In [None]:
# you will be prompted with a window asking to grant permissions
from google.colab import drive
drive.mount("/content/drive", force_remount=True)

Mounted at /content/drive


In [None]:


!cp -r "/content/drive/MyDrive/assignment4_starter_sp25 3" /content/


In [None]:


%cd /content/assignment4_starter_sp25 3


VOC_PATH = "/content/VOC_DATA"


!chmod u+x download_data.sh
!sed -i 's/\r//g' download_data.sh


!cat download_data.sh


!bash download_data.sh $VOC_PATH


/content/assignment4_starter_sp25 3
#!/usr/bin/env bash

if [ "$#" -ne 1 ]; then
    echo "Usage: $0 <data_path>"
    exit 1
fi

DATA_DIR="$1"

mkdir -p "$DATA_DIR"

cd "$DATA_DIR" || { echo "Could not change directory to '$DATA_DIR'"; exit 1; }
# cd data

# download train
wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar
tar -xf VOCtrainval_06-Nov-2007.tar
mv VOCdevkit VOCdevkit_2007
rm VOCtrainval_06-Nov-2007.tar

# download test and combine into same directory
wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar
tar -xf VOCtest_06-Nov-2007.tar
mv VOCdevkit/VOC2007 VOCdevkit_2007/VOC2007test
rmdir VOCdevkit
rm VOCtest_06-Nov-2007.tar

# MIRROR Links (comment out above and uncomment out below if host.robots.ox.au.uk is down)

# wget http://pjreddie.com/media/files/VOCtrainval_06-Nov-2007.tar
# tar -xf VOCtrainval_06-Nov-2007.tar
# mv VOCdevkit VOCdevkit_2007

# wget http://pjreddie.com/media/files/VOCtest_06-Nov-2007.tar
# tar -xf V

In [None]:
import os
import random

import cv2
import numpy as np

import torch
from torch.utils.data import DataLoader

from src.resnet_yolo import resnet50
import yolo_loss
from src.dataset import VocDetectorDataset
from src.eval_voc import evaluate
from src.predict import predict_image
from src.config import VOC_CLASSES, COLORS
from kaggle_submission import output_submission_csv

import matplotlib.pyplot as plt
import collections

%matplotlib inline
%load_ext autoreload
%autoreload 2

## Initialization

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [None]:

B = 2
S = 14

## **Start from here if you modified yolo_loss.py and wish to retrain**

In [None]:


learning_rate = 0.001
num_epochs = 50
batch_size = 16
lambda_coord = 10
lambda_noobj = 0.05



## Reading Pascal Data

Since Pascal is a small dataset (5000 in train+val) we have combined the train and val splits to train our detector. This is not typically a good practice, but we will make an exception in this case to be able to get reasonable detection results with a comparatively small object detection dataset.

The train dataset loader also using a variety of data augmentation techniques including random shift, scaling, crop, and flips. Data augmentation is slightly more complicated for detection datasets since the bounding box annotations must be kept consistent throughout the transformations.

Since the output of the detector network we train is an SxSx(B*5+C), we use an encoder to convert the original bounding box coordinates into relative grid bounding box coordinates corresponding to the expected output. We also use a decoder which allows us to convert the opposite direction into image coordinate bounding boxes.

In [None]:

file_root_train = os.path.join(VOC_PATH, 'VOCdevkit_2007/VOC2007/JPEGImages/')
annotation_file_train = 'data/voc2007.txt'

train_dataset = VocDetectorDataset(root_img_dir=file_root_train,dataset_file=annotation_file_train,train=True, S=S)
train_loader = DataLoader(train_dataset,batch_size=batch_size,shuffle=True,num_workers=2)
print('Loaded %d train images' % len(train_dataset))

Initializing dataset
Loaded 5011 train images


In [None]:
file_root_test = os.path.join(VOC_PATH, 'VOCdevkit_2007/VOC2007test/JPEGImages/')
annotation_file_test = 'data/voc2007test.txt'

test_dataset = VocDetectorDataset(root_img_dir=file_root_test,dataset_file=annotation_file_test,train=False, S=S)
test_loader = DataLoader(test_dataset,batch_size=batch_size,shuffle=False,num_workers=2)
print('Loaded %d test images' % len(test_dataset))

Initializing dataset
Loaded 4950 test images


In [None]:
data = train_dataset[0]

## Initializing the network

To implement Yolo we will rely on a pretrained classifier as the backbone for our detection network. PyTorch offers a variety of models which are pretrained on ImageNet in the [`torchvision.models`](https://pytorch.org/docs/stable/torchvision/models.html) package. In particular, we will use the ResNet50 architecture as a base for our detector. This is different from the base architecture in the Yolo paper and also results in a different output grid size (14x14 instead of 7x7).

Models are typically pretrained on ImageNet since the dataset is very large (> 1 million images) and widely used. The pretrained model provides a very useful weight initialization for our detector, so that the network is able to learn quickly and effectively.

In [None]:
load_network_path = None
pretrained = True


if load_network_path is not None:
    print('Loading saved network from {}'.format(load_network_path))
    net = resnet50().to(device)
    net.load_state_dict(torch.load(load_network_path))
else:
    print('Load pre-trained model')
    net = resnet50(pretrained=pretrained).to(device)

Load pre-trained model


## Set up training tools

In [None]:



from importlib import reload
reload(yolo_loss) # update the import if the implementation has changed

criterion = yolo_loss.YoloLoss(S, B, lambda_coord, lambda_noobj)
optimizer = torch.optim.SGD(net.parameters(), lr=learning_rate, momentum=0.9, weight_decay=5e-4)

## Train detector

In [None]:
best_test_loss = np.inf
learning_rate = learning_rate

os.makedirs('./checkpoints', exist_ok=True)
for epoch in range(num_epochs):
    if epoch in [30, 40]:
        for param_group in optimizer.param_groups:
            param_group['lr'] *= 0.1
        print(f"📉 Learning rate decayed to {optimizer.param_groups[0]['lr']}")
    net.train()



    for param_group in optimizer.param_groups:
        param_group['lr'] = learning_rate

    print('\n\nStarting epoch %d / %d' % (epoch + 1, num_epochs))
    print('Learning Rate for this epoch: {}'.format(learning_rate))

    total_loss = collections.defaultdict(int)

    for i, data in enumerate(train_loader):
        data = (item.to(device) for item in data)
        images, target_boxes, target_cls, has_object_map = data
        pred = net(images)
        loss_dict = criterion(pred, target_boxes, target_cls, has_object_map)
        for key in loss_dict:
            total_loss[key] += loss_dict[key].item()

        optimizer.zero_grad()
        loss_dict['total_loss'].backward()
        optimizer.step()

        if (i+1) % 50 == 0:
            outstring = 'Epoch [%d/%d], Iter [%d/%d], Loss: ' % ((epoch+1, num_epochs, i+1, len(train_loader)))
            outstring += ', '.join( "%s=%.3f" % (key[:-5], val / (i+1)) for key, val in total_loss.items() )
            print(outstring)

    if (epoch + 1) % 5 == 0:
        test_aps = evaluate(net, test_dataset_file=annotation_file_test, img_root=file_root_test)
        print(epoch, test_aps)
    with torch.no_grad():
        test_loss = 0.0
        net.eval()
        for i, data in enumerate(test_loader):
            data = (item.to(device) for item in data)
            images, target_boxes, target_cls, has_object_map = data

            pred = net(images)
            loss_dict = criterion(pred, target_boxes, target_cls, has_object_map)
            test_loss += loss_dict['total_loss'].item()
        test_loss /= len(test_loader)

    if best_test_loss > test_loss:
        best_test_loss = test_loss
        print('Updating best test loss: %.5f' % best_test_loss)
        torch.save(net.state_dict(),'checkpoints/best_detector.pth')

    if (epoch+1) in [5, 10, 20, 30, 40, 50]:
        torch.save(net.state_dict(),'checkpoints/detector_epoch_%d.pth' % (epoch+1))

    torch.save(net.state_dict(),'checkpoints/detector.pth')





Starting epoch 1 / 50
Learning Rate for this epoch: 0.001
Epoch [1/50], Iter [50/314], Loss: total=8.255, reg=0.377, containing_obj=0.116, no_obj=0.569, cls=4.340
Epoch [1/50], Iter [100/314], Loss: total=7.024, reg=0.332, containing_obj=0.110, no_obj=0.561, cls=3.561
Epoch [1/50], Iter [150/314], Loss: total=6.301, reg=0.305, containing_obj=0.099, no_obj=0.557, cls=3.126
Epoch [1/50], Iter [200/314], Loss: total=5.880, reg=0.290, containing_obj=0.091, no_obj=0.553, cls=2.865
Epoch [1/50], Iter [250/314], Loss: total=5.546, reg=0.277, containing_obj=0.085, no_obj=0.550, cls=2.659
Epoch [1/50], Iter [300/314], Loss: total=5.278, reg=0.268, containing_obj=0.080, no_obj=0.546, cls=2.488
Updating best test loss: 3.75521


Starting epoch 2 / 50
Learning Rate for this epoch: 0.001
Epoch [2/50], Iter [50/314], Loss: total=3.632, reg=0.211, containing_obj=0.047, no_obj=0.525, cls=1.445
Epoch [2/50], Iter [100/314], Loss: total=3.586, reg=0.209, containing_obj=0.045, no_obj=0.524, cls=1.428
E

100%|██████████| 4950/4950 [07:58<00:00, 10.34it/s]


---class aeroplane ap 5.8275567676873635e-05---
---class bicycle ap 1.4344692603199104e-05---
---class bird ap 0.00019783444365598877---
---class boat ap 7.37676813755489e-06---
---class bottle ap 0.0---
---class bus ap 8.997768553398757e-06---
---class car ap 0.03068397045149418---
---class cat ap 0.0007469750606559947---
---class chair ap 0.0021155232763318873---
---class cow ap 1.2951462064649553e-05---
---class diningtable ap 1.212682720968982e-06---
---class dog ap 0.0017059333106112246---
---class horse ap 5.433093625242588e-07---
---class motorbike ap 7.866954072722124e-06---
---class person ap 0.18503798992716525---
---class pottedplant ap 4.296950196628441e-06---
---class sheep ap 6.24581530374649e-07---
---class sofa ap 1.6904770357147084e-06---
---class train ap 0.00011001161719854997---
---class tvmonitor ap 4.3766554893618786e-05---
---map 0.011038009292798068---
4 [np.float64(5.8275567676873635e-05), np.float64(1.4344692603199104e-05), np.float64(0.00019783444365598877), 

100%|██████████| 4950/4950 [07:52<00:00, 10.47it/s]


---class aeroplane ap 0.0009127098074994672---
---class bicycle ap 0.0012399706922503067---
---class bird ap 0.005413227324487063---
---class boat ap 5.9260061240864666e-05---
---class bottle ap 2.3544568927196424e-05---
---class bus ap 7.578351736565267e-05---
---class car ap 0.3562414777690971---
---class cat ap 0.05146810771727404---
---class chair ap 0.07848240718085205---
---class cow ap 2.235571065450814e-06---
---class diningtable ap 1.9763334074458357e-05---
---class dog ap 0.027692677113272127---
---class horse ap 0.000706924681945046---
---class motorbike ap 4.2416915866047385e-05---
---class person ap 0.27921034500486963---
---class pottedplant ap 0.0002790574792358118---
---class sheep ap 1.0142934229157284e-06---
---class sofa ap 7.746461416424976e-07---
---class train ap 0.0031754586076088195---
---class tvmonitor ap 2.9435659535387545e-05---
---map 0.040253829597301546---
9 [np.float64(0.0009127098074994672), np.float64(0.0012399706922503067), np.float64(0.00541322732448

100%|██████████| 4950/4950 [07:44<00:00, 10.66it/s]


---class aeroplane ap 0.14052711657595132---
---class bicycle ap 0.27957467444999423---
---class bird ap 0.24101183994830133---
---class boat ap 0.00024763189142061815---
---class bottle ap 0.007065160567276822---
---class bus ap 0.041759066905991085---
---class car ap 0.45462776959827594---
---class cat ap 0.3022043351570481---
---class chair ap 0.15350804976464288---
---class cow ap 5.9409984590535255e-05---
---class diningtable ap 0.0006026786960478348---
---class dog ap 0.4371194480194809---
---class horse ap 0.34412771362946865---
---class motorbike ap 0.0005599293367640845---
---class person ap 0.3696637067412464---
---class pottedplant ap 0.03462523659897959---
---class sheep ap 1.649593375233005e-06---
---class sofa ap 1.2317630635316016e-05---
---class train ap 0.28114698338115673---
---class tvmonitor ap 0.18678367235055557---
---map 0.16376141954106016---
14 [np.float64(0.14052711657595132), np.float64(0.27957467444999423), np.float64(0.24101183994830133), np.float64(0.00024

100%|██████████| 4950/4950 [07:18<00:00, 11.30it/s]


---class aeroplane ap 0.2810102391862599---
---class bicycle ap 0.37776946151949653---
---class bird ap 0.30040842249727695---
---class boat ap 0.034692778844255215---
---class bottle ap 0.02995498962435035---
---class bus ap 0.22743088946912887---
---class car ap 0.49852880507645564---
---class cat ap 0.578611403838408---
---class chair ap 0.1741811885013333---
---class cow ap 0.033621131982354974---
---class diningtable ap 0.027715199516068434---
---class dog ap 0.5681354633310607---
---class horse ap 0.48707892889899973---
---class motorbike ap 0.23173636146816823---
---class person ap 0.40806179551570215---
---class pottedplant ap 0.06537917540750891---
---class sheep ap 0.054159352720046676---
---class sofa ap 9.09454690967296e-06---
---class train ap 0.4876104366361264---
---class tvmonitor ap 0.34363774473423286---
---map 0.2604866431657072---
19 [np.float64(0.2810102391862599), np.float64(0.37776946151949653), np.float64(0.30040842249727695), np.float64(0.034692778844255215), n

100%|██████████| 4950/4950 [07:00<00:00, 11.77it/s]


---class aeroplane ap 0.36033980196351295---
---class bicycle ap 0.4686752175127973---
---class bird ap 0.44005446621912475---
---class boat ap 0.0728972100100551---
---class bottle ap 0.0736921426963177---
---class bus ap 0.3428578499958568---
---class car ap 0.5594940995155679---
---class cat ap 0.6506826859767545---
---class chair ap 0.23669901077698213---
---class cow ap 0.17494449377956642---
---class diningtable ap 0.24524360126545697---
---class dog ap 0.5959547208349525---
---class horse ap 0.5475662379544078---
---class motorbike ap 0.42491065655050986---
---class person ap 0.43194339492035905---
---class pottedplant ap 0.1035957987766187---
---class sheep ap 0.15918553686777817---
---class sofa ap 4.1017016934900866e-05---
---class train ap 0.5907342754380501---
---class tvmonitor ap 0.35471938578018036---
---map 0.34171158019258924---
24 [np.float64(0.36033980196351295), np.float64(0.4686752175127973), np.float64(0.44005446621912475), np.float64(0.0728972100100551), np.float

100%|██████████| 4950/4950 [06:42<00:00, 12.29it/s]


---class aeroplane ap 0.4145011870633033---
---class bicycle ap 0.517996762727651---
---class bird ap 0.48694576691320446---
---class boat ap 0.12524129612301943---
---class bottle ap 0.12059307421631937---
---class bus ap 0.4131062953373788---
---class car ap 0.5469343897364944---
---class cat ap 0.6656257757173252---
---class chair ap 0.2867677621416955---
---class cow ap 0.25836956406916317---
---class diningtable ap 0.26617280849755526---
---class dog ap 0.668188154114628---
---class horse ap 0.5668952365723471---
---class motorbike ap 0.47926748141730025---
---class person ap 0.4655460857408257---
---class pottedplant ap 0.12612100049522423---
---class sheep ap 0.2673144868675549---
---class sofa ap 0.00886868469258856---
---class train ap 0.6049645860168597---
---class tvmonitor ap 0.4353386033885036---
---map 0.3862379500924471---
29 [np.float64(0.4145011870633033), np.float64(0.517996762727651), np.float64(0.48694576691320446), np.float64(0.12524129612301943), np.float64(0.1205

100%|██████████| 4950/4950 [06:32<00:00, 12.61it/s]


---class aeroplane ap 0.4626636658574136---
---class bicycle ap 0.5483468057402818---
---class bird ap 0.5055021217906759---
---class boat ap 0.14942425066681617---
---class bottle ap 0.09550812451847468---
---class bus ap 0.48707883958572273---
---class car ap 0.5776846754744505---
---class cat ap 0.6970827235581506---
---class chair ap 0.26625527374670804---
---class cow ap 0.3426554796190935---
---class diningtable ap 0.2621703577725708---
---class dog ap 0.6395306662425714---
---class horse ap 0.6263533714485485---
---class motorbike ap 0.5059464191617273---
---class person ap 0.487594354072373---
---class pottedplant ap 0.16226163973005628---
---class sheep ap 0.2830092111475413---
---class sofa ap 0.2692447373400573---
---class train ap 0.6438179960917111---
---class tvmonitor ap 0.46234391863176844---
---map 0.4237237316098357---
34 [np.float64(0.4626636658574136), np.float64(0.5483468057402818), np.float64(0.5055021217906759), np.float64(0.14942425066681617), np.float64(0.09550

100%|██████████| 4950/4950 [06:24<00:00, 12.87it/s]


---class aeroplane ap 0.47683684242060476---
---class bicycle ap 0.6043322391309855---
---class bird ap 0.5282057279529424---
---class boat ap 0.21379900182574463---
---class bottle ap 0.12087457706866193---
---class bus ap 0.5570994046584221---
---class car ap 0.6143656577987127---
---class cat ap 0.7194043628526606---
---class chair ap 0.26393780384896787---
---class cow ap 0.41833563908169347---
---class diningtable ap 0.30965205324929207---
---class dog ap 0.6497362522756969---
---class horse ap 0.6087018183208217---
---class motorbike ap 0.5435177324015106---
---class person ap 0.5066174329008037---
---class pottedplant ap 0.20824324822971532---
---class sheep ap 0.3947423759878921---
---class sofa ap 0.40140095072614057---
---class train ap 0.6608093115107274---
---class tvmonitor ap 0.4112354317543641---
---map 0.460592393199818---
39 [np.float64(0.47683684242060476), np.float64(0.6043322391309855), np.float64(0.5282057279529424), np.float64(0.21379900182574463), np.float64(0.12

100%|██████████| 4950/4950 [06:19<00:00, 13.03it/s]


---class aeroplane ap 0.4775275232035816---
---class bicycle ap 0.6125964006705313---
---class bird ap 0.5548932718469339---
---class boat ap 0.22537168569301697---
---class bottle ap 0.12276621472643473---
---class bus ap 0.547622260515457---
---class car ap 0.6220886022776231---
---class cat ap 0.7134512898491547---
---class chair ap 0.2839669189837997---
---class cow ap 0.4141020449472787---
---class diningtable ap 0.32335627375394926---
---class dog ap 0.6862954220361086---
---class horse ap 0.6627740704283844---
---class motorbike ap 0.5835831996208903---
---class person ap 0.5154440672217666---
---class pottedplant ap 0.21115004026019793---
---class sheep ap 0.4224876999339805---
---class sofa ap 0.42231714566307554---
---class train ap 0.661202303575046---
---class tvmonitor ap 0.4703433241533715---
---map 0.47666698796802914---
44 [np.float64(0.4775275232035816), np.float64(0.6125964006705313), np.float64(0.5548932718469339), np.float64(0.22537168569301697), np.float64(0.122766

100%|██████████| 4950/4950 [06:04<00:00, 13.60it/s]


---class aeroplane ap 0.5413454681322927---
---class bicycle ap 0.6559564251561015---
---class bird ap 0.5529299016147285---
---class boat ap 0.28314384028879114---
---class bottle ap 0.15366993969166245---
---class bus ap 0.5403833533424375---
---class car ap 0.6465623014892963---
---class cat ap 0.7297864848466433---
---class chair ap 0.3143417494841434---
---class cow ap 0.420306845352303---
---class diningtable ap 0.34389640346810363---
---class dog ap 0.6576663921892423---
---class horse ap 0.633145481059038---
---class motorbike ap 0.591707818388673---
---class person ap 0.5335828728208057---
---class pottedplant ap 0.20281587655024708---
---class sheep ap 0.403659252232655---
---class sofa ap 0.4500562650305758---
---class train ap 0.65854966570851---
---class tvmonitor ap 0.4420559458193249---
---map 0.4877781141332787---
49 [np.float64(0.5413454681322927), np.float64(0.6559564251561015), np.float64(0.5529299016147285), np.float64(0.28314384028879114), np.float64(0.153669939691

# View example predictions

In [None]:
!cp -r /content/assignment4_starter_sp25 2/checkpoints /content/drive/MyDrive/assignment4_backups/


cp: target '/content/drive/MyDrive/assignment4_backups/' is not a directory


In [None]:
from src.resnet_yolo import resnet50
import torch

best_ckpt_path = 'checkpoints/detector_epoch_50.pth'

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

for i in range(7):
    net = resnet50()

    net.load_state_dict(torch.load(best_ckpt_path, map_location=device))
    net = net.to(device)
    net.eval()

    image_name = random.choice(test_dataset.fnames)
    image = cv2.imread(os.path.join(file_root_test, image_name))
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    print('predicting...')
    result = predict_image(net, image_name, root_img_directory=file_root_test)

    for left_up, right_bottom, class_name, _, prob in result:
        if prob < 0.7:
            continue
        color = COLORS[VOC_CLASSES.index(class_name)]
        cv2.rectangle(image, left_up, right_bottom, color, 2)
        label = class_name + str(round(prob, 2))
        text_size, baseline = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.4, 1)
        p1 = (left_up[0], left_up[1] - text_size[1])
        cv2.rectangle(image, (p1[0] - 2 // 2, p1[1] - 2 - baseline),
                      (p1[0] + text_size[0], p1[1] + text_size[1]), color, -1)
        cv2.putText(image, label, (p1[0], p1[1] + baseline),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.4, (255, 255, 255), 1, 8)

    plt.figure(figsize=(10, 10))
    plt.imshow(image)
    plt.axis("off")
    plt.show()


Output hidden; open in https://colab.research.google.com to view.

## Evaluate on Test

To evaluate detection results we use mAP (mean of average precision over each class)

In [None]:
test_aps = evaluate(net, test_dataset_file=annotation_file_test, img_root=file_root_test)

---Evaluate model on test samples---


100%|██████████| 4950/4950 [06:06<00:00, 13.50it/s]


---class aeroplane ap 0.5413454681322927---
---class bicycle ap 0.6559564251561015---
---class bird ap 0.5529299016147285---
---class boat ap 0.28314384028879114---
---class bottle ap 0.15366993969166245---
---class bus ap 0.5403833533424375---
---class car ap 0.6465623014892963---
---class cat ap 0.7297864848466433---
---class chair ap 0.3143417494841434---
---class cow ap 0.420306845352303---
---class diningtable ap 0.34389640346810363---
---class dog ap 0.6576663921892423---
---class horse ap 0.633145481059038---
---class motorbike ap 0.591707818388673---
---class person ap 0.5335828728208057---
---class pottedplant ap 0.20281587655024708---
---class sheep ap 0.403659252232655---
---class sofa ap 0.4500562650305758---
---class train ap 0.65854966570851---
---class tvmonitor ap 0.4420559458193249---
---map 0.4877781141332787---


### Cell added to get intermediate mAP values for students

In [None]:
network_paths = ['./checkpoints/detector_epoch_%d.pth' % epoch for epoch in [5, 10, 20, 30, 40, 50]]+['./checkpoints/detector.pth']
for load_network_path in network_paths:
    print('Loading saved network from {}'.format(load_network_path))
    net_loaded =  resnet50().to(device)
    net_loaded.load_state_dict(torch.load(load_network_path))
    evaluate(net_loaded, test_dataset_file=annotation_file_test, img_root=file_root_test)


Loading saved network from ./checkpoints/detector_epoch_5.pth
---Evaluate model on test samples---


100%|██████████| 4950/4950 [08:09<00:00, 10.12it/s]


---class aeroplane ap 5.8275567676873635e-05---
---class bicycle ap 1.4344692603199104e-05---
---class bird ap 0.00019783444365598877---
---class boat ap 7.37676813755489e-06---
---class bottle ap 0.0---
---class bus ap 8.997768553398757e-06---
---class car ap 0.03068397045149418---
---class cat ap 0.0007469750606559947---
---class chair ap 0.0021155232763318873---
---class cow ap 1.2951462064649553e-05---
---class diningtable ap 1.212682720968982e-06---
---class dog ap 0.0017059333106112246---
---class horse ap 5.433093625242588e-07---
---class motorbike ap 7.866954072722124e-06---
---class person ap 0.18503798992716525---
---class pottedplant ap 4.296950196628441e-06---
---class sheep ap 6.24581530374649e-07---
---class sofa ap 1.6904770357147084e-06---
---class train ap 0.00011001161719854997---
---class tvmonitor ap 4.3766554893618786e-05---
---map 0.011038009292798068---
Loading saved network from ./checkpoints/detector_epoch_10.pth
---Evaluate model on test samples---


100%|██████████| 4950/4950 [08:09<00:00, 10.12it/s]


---class aeroplane ap 0.0009127098074994672---
---class bicycle ap 0.0012399706922503067---
---class bird ap 0.005413227324487063---
---class boat ap 5.9260061240864666e-05---
---class bottle ap 2.3544568927196424e-05---
---class bus ap 7.578351736565267e-05---
---class car ap 0.3562414777690971---
---class cat ap 0.05146810771727404---
---class chair ap 0.07848240718085205---
---class cow ap 2.235571065450814e-06---
---class diningtable ap 1.9763334074458357e-05---
---class dog ap 0.027692677113272127---
---class horse ap 0.000706924681945046---
---class motorbike ap 4.2416915866047385e-05---
---class person ap 0.27921034500486963---
---class pottedplant ap 0.0002790574792358118---
---class sheep ap 1.0142934229157284e-06---
---class sofa ap 7.746461416424976e-07---
---class train ap 0.0031754586076088195---
---class tvmonitor ap 2.9435659535387545e-05---
---map 0.040253829597301546---
Loading saved network from ./checkpoints/detector_epoch_20.pth
---Evaluate model on test samples---


100%|██████████| 4950/4950 [07:19<00:00, 11.25it/s]


---class aeroplane ap 0.2810102391862599---
---class bicycle ap 0.37776946151949653---
---class bird ap 0.30040842249727695---
---class boat ap 0.034692778844255215---
---class bottle ap 0.02995498962435035---
---class bus ap 0.22743088946912887---
---class car ap 0.49852880507645564---
---class cat ap 0.578611403838408---
---class chair ap 0.1741811885013333---
---class cow ap 0.033621131982354974---
---class diningtable ap 0.027715199516068434---
---class dog ap 0.5681354633310607---
---class horse ap 0.48707892889899973---
---class motorbike ap 0.23173636146816823---
---class person ap 0.40806179551570215---
---class pottedplant ap 0.06537917540750891---
---class sheep ap 0.054159352720046676---
---class sofa ap 9.09454690967296e-06---
---class train ap 0.4876104366361264---
---class tvmonitor ap 0.34363774473423286---
---map 0.2604866431657072---
Loading saved network from ./checkpoints/detector_epoch_30.pth
---Evaluate model on test samples---


100%|██████████| 4950/4950 [06:41<00:00, 12.32it/s]


---class aeroplane ap 0.4145011870633033---
---class bicycle ap 0.517996762727651---
---class bird ap 0.48694576691320446---
---class boat ap 0.12524129612301943---
---class bottle ap 0.12059307421631937---
---class bus ap 0.4131062953373788---
---class car ap 0.5469343897364944---
---class cat ap 0.6656257757173252---
---class chair ap 0.2867677621416955---
---class cow ap 0.25836956406916317---
---class diningtable ap 0.26617280849755526---
---class dog ap 0.668188154114628---
---class horse ap 0.5668952365723471---
---class motorbike ap 0.47926748141730025---
---class person ap 0.4655460857408257---
---class pottedplant ap 0.12612100049522423---
---class sheep ap 0.2673144868675549---
---class sofa ap 0.00886868469258856---
---class train ap 0.6049645860168597---
---class tvmonitor ap 0.4353386033885036---
---map 0.3862379500924471---
Loading saved network from ./checkpoints/detector_epoch_40.pth
---Evaluate model on test samples---


100%|██████████| 4950/4950 [06:20<00:00, 13.00it/s]


---class aeroplane ap 0.47683684242060476---
---class bicycle ap 0.6043322391309855---
---class bird ap 0.5282057279529424---
---class boat ap 0.21379900182574463---
---class bottle ap 0.12087457706866193---
---class bus ap 0.5570994046584221---
---class car ap 0.6143656577987127---
---class cat ap 0.7194043628526606---
---class chair ap 0.26393780384896787---
---class cow ap 0.41833563908169347---
---class diningtable ap 0.30965205324929207---
---class dog ap 0.6497362522756969---
---class horse ap 0.6087018183208217---
---class motorbike ap 0.5435177324015106---
---class person ap 0.5066174329008037---
---class pottedplant ap 0.20824324822971532---
---class sheep ap 0.3947423759878921---
---class sofa ap 0.40140095072614057---
---class train ap 0.6608093115107274---
---class tvmonitor ap 0.4112354317543641---
---map 0.460592393199818---
Loading saved network from ./checkpoints/detector_epoch_50.pth
---Evaluate model on test samples---


100%|██████████| 4950/4950 [05:55<00:00, 13.91it/s]


---class aeroplane ap 0.5413454681322927---
---class bicycle ap 0.6559564251561015---
---class bird ap 0.5529299016147285---
---class boat ap 0.28314384028879114---
---class bottle ap 0.15366993969166245---
---class bus ap 0.5403833533424375---
---class car ap 0.6465623014892963---
---class cat ap 0.7297864848466433---
---class chair ap 0.3143417494841434---
---class cow ap 0.420306845352303---
---class diningtable ap 0.34389640346810363---
---class dog ap 0.6576663921892423---
---class horse ap 0.633145481059038---
---class motorbike ap 0.591707818388673---
---class person ap 0.5335828728208057---
---class pottedplant ap 0.20281587655024708---
---class sheep ap 0.403659252232655---
---class sofa ap 0.4500562650305758---
---class train ap 0.65854966570851---
---class tvmonitor ap 0.4420559458193249---
---map 0.4877781141332787---
Loading saved network from ./checkpoints/detector.pth
---Evaluate model on test samples---


100%|██████████| 4950/4950 [05:54<00:00, 13.97it/s]


---class aeroplane ap 0.5413454681322927---
---class bicycle ap 0.6559564251561015---
---class bird ap 0.5529299016147285---
---class boat ap 0.28314384028879114---
---class bottle ap 0.15366993969166245---
---class bus ap 0.5403833533424375---
---class car ap 0.6465623014892963---
---class cat ap 0.7297864848466433---
---class chair ap 0.3143417494841434---
---class cow ap 0.420306845352303---
---class diningtable ap 0.34389640346810363---
---class dog ap 0.6576663921892423---
---class horse ap 0.633145481059038---
---class motorbike ap 0.591707818388673---
---class person ap 0.5335828728208057---
---class pottedplant ap 0.20281587655024708---
---class sheep ap 0.403659252232655---
---class sofa ap 0.4500562650305758---
---class train ap 0.65854966570851---
---class tvmonitor ap 0.4420559458193249---
---map 0.4877781141332787---


In [None]:
output_submission_csv('my_solution.csv', test_aps)

In [None]:
import torch
import json
import os


os.makedirs("checkpoints", exist_ok=True)


best_test_loss = float("inf")
map_history = []

def save_best_model(model, epoch, test_loss, current_map):
    global best_test_loss
    if test_loss < best_test_loss:
        best_test_loss = test_loss

        torch.save(model.state_dict(), f"checkpoints/best_model_epoch_{epoch}.pth")
        print(f"Best model saved at epoch {epoch} with loss {test_loss:.5f}")

        map_history.append({
            "epoch": epoch,
            "map": current_map,
            "loss": test_loss
        })
        with open("checkpoints/map_history.json", "w") as f:
            json.dump(map_history, f, indent=2)

        with open("checkpoints/training_log.txt", "a") as f:
            f.write(f"Epoch {epoch} - loss: {test_loss:.5f}, mAP: {current_map:.5f}\n")


In [None]:

!zip -r model_backup.zip checkpoints
from google.colab import files
files.download("model_backup.zip")


updating: checkpoints/ (stored 0%)
updating: checkpoints/detector_epoch_50.pth