In [5]:
import warnings
warnings.filterwarnings(action='ignore') 

In [6]:
import time

import pandas as pd
import torch
import torch.nn as nn
import torch.nn.init as init
import torch.optim as optim
import torch.utils.data as data

import gc


In [7]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


### ssd_model
지금까지 구현한 모든 class를 ssd_model.py에 저장 후 load해 사용

In [8]:
from ssd_model import makeDatapathList,VOCDataset,dataTransform,anno_xml2list,od_collate_fn

root_path="./data/VOCdevkit/VOC2012/"

datapath_list=makeDatapathList(root_path)
train_img_list,train_anno_list=datapath_list('train')
val_img_list,val_anno_list=datapath_list('val')

voc_classes=['aeroplane','bicycle','bird','boat','bottle','bus','car','cat','chair','cow','diningtable','dog','horse','motorbike','person','pottedplant','sheep','sofa','train','tvmonitor']
color_mean=(104,117,123)
input_size=300

train_dataset=VOCDataset(train_img_list,train_anno_list,phase="train",transform=dataTransform(input_size,color_mean),transform_anno=anno_xml2list(voc_classes))
val_dataset=VOCDataset(val_img_list,val_anno_list,phase="val",transform=dataTransform(input_size,color_mean),transform_anno=anno_xml2list(voc_classes))

batch_size=32
train_dataloader=data.DataLoader(train_dataset,batch_size=batch_size,shuffle=True,collate_fn=od_collate_fn)
val_dataloader=data.DataLoader(val_dataset,batch_size=batch_size,shuffle=False,collate_fn=od_collate_fn)
dataloaders_dict={"train":train_dataloader,"val":val_dataloader}


VGG 외 모듈의 초기 값으로 He를 사용
He: ReLU를 activation function인 경우 사용
- Kaiming He가 처음 제안해서 함수 명이 kaiming_normal_


In [9]:
from ssd_model import SSD

ssd_cfg={
    'num_classes':21, #include background class
    'input_size':300, 
    'bbox_aspect_num':[4,6,6,6,4,4], #DBox 화면비 
    'feature_maps':[38,19,10,5,3,1], #각 source 별 화상 크기
    'steps':[8,16,32,64,100,300], 
    'min_sizes':[30,60,111,162,213,264], #DBox 최소 크기
    'max_sizes':[60,111,162,213,264,315],
    'aspect_ratios':[[2],[2,3],[2,3],[2,3],[2],[2]]
}

net=SSD(phase="train",cfg=ssd_cfg)

#load weight of vgg
vgg_weights=torch.load('./weights/vgg16_reducedfc.pth')
net.vgg.load_state_dict(vgg_weights)


#vgg 제외한 네트워크 He 초기화
def weights_init(m):
    if isinstance(m,nn.Conv2d):
        init.kaiming_normal_(m.weight.data)
        if m.bias is not None:
            nn.init.constant_(m.bias,0.0)

net.extras.apply(weights_init)
net.loc.apply(weights_init)
net.conf.apply(weights_init)

ModuleList(
  (0): Conv2d(512, 84, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): Conv2d(1024, 126, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (2): Conv2d(512, 126, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (3): Conv2d(256, 126, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (4): Conv2d(256, 84, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (5): Conv2d(256, 84, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
)

In [10]:
from ssd_model import MultiBoxLoss

criterion=MultiBoxLoss(jaccard_thresh=0.5, neg_pos=3,device=device)

optimizer=optim.SGD(net.parameters(),lr=1e-3,momentum=0.9,weight_decay=5e-4)

In [11]:
def train_model(net,dataloaders_dict,criterion,optimizer,num_epochs):
    device=torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print(device)
    net.to(device)

    torch.backends.cudnn.benchmark=True

    iteration=1
    epoch_train_loss=0.0
    epoch_val_loss=0.0
    logs=[]

    for epoch in range(num_epochs+1):
        gc.collect()
        torch.cuda.empty_cache()
        t_epoch_start=time.time()
        t_iter_start=time.time()

        print(f"Epoch {epoch+1}/{num_epochs}")

        for phase in ['train','val']:
            if phase=="train":
                net.train()
                print("[train]")
            else:
                if (epoch+1)%10==0:
                    net.eval()
                    print("[val]")
                else:
                    continue
            
            for images,targets in dataloaders_dict[phase]:
                images=images.to(device)
                targets=[target.to(device) for target in targets]
                
                optimizer.zero_grad()
            
                with torch.set_grad_enabled(phase=='train'):
                    outputs=net(images)
                    loss_l,loss_c=criterion(outputs,targets)
                    loss=loss_l+loss_c
                    if phase=="train":
                        loss.backward()
                        
                        nn.utils.clip_grad_value_(net.parameters(),clip_value=2.0)

                        optimizer.step()

                        if iteration%10==0:
                            t_iter_finish=time.time()
                            print("[%s] loss:%.4f || sec:%.4f"%(iteration,loss.item(),t_iter_finish-t_iter_start))
                            t_iter_start=time.time()
                        
                        epoch_train_loss+=loss.item()
                        iteration+=1
                    else:
                        epoch_val_loss+=loss.item()
        t_epoch_finish=time.time()
        print("[%s] train_loss: %.4f || val_loss: %.4f || sec:%.4f"%(epoch+1,epoch_train_loss,epoch_val_loss,t_epoch_finish-t_epoch_start))
        t_epoch_start=time.time()

        log_epoch={'epoch':epoch+1,'train_loss':epoch_train_loss,'val_loss':epoch_val_loss}
        logs.append(log_epoch)
        df=pd.DataFrame(logs)
        df.to_csv("log_output.csv")

        epoch_train_loss=0.0
        epoch_val_loss=0.0
        if (epoch+1)%10==0:
            torch.save(net.state_dict(),f'weights/ssd300_{str(epoch+1)}.pth')

In [12]:
num_epochs=50
train_model(net,dataloaders_dict,criterion,optimizer,num_epochs)

cuda:0
Epoch 1/50
[train]
[10] loss:18.8551 || sec:10.7139
[20] loss:15.1328 || sec:6.8665
[30] loss:10.7758 || sec:6.7172
[40] loss:9.3380 || sec:6.4660
[50] loss:9.1531 || sec:6.8116
[60] loss:8.8223 || sec:6.4990
[70] loss:9.3177 || sec:6.7359
[80] loss:9.5099 || sec:6.3402
[90] loss:8.6765 || sec:7.1307
[100] loss:10.1284 || sec:6.7207
[110] loss:9.2552 || sec:7.0567
[120] loss:8.8562 || sec:6.7554
[130] loss:8.9482 || sec:6.9028
[140] loss:9.6521 || sec:6.6987
[150] loss:9.0618 || sec:6.7066
[160] loss:9.7839 || sec:6.7284
[170] loss:8.8910 || sec:6.5956
[1] train_loss: 1915.0481 || val_loss: 0.0000 || sec:128.7687
Epoch 2/50
[train]
[180] loss:9.4705 || sec:0.5080
[190] loss:8.2160 || sec:6.0889
[200] loss:8.8106 || sec:6.4273
[210] loss:9.9323 || sec:6.2250
[220] loss:11.0503 || sec:6.4944
[230] loss:10.6639 || sec:6.5156
[240] loss:8.4303 || sec:6.2401
[250] loss:8.4952 || sec:6.2426
[260] loss:9.8443 || sec:6.1211
[270] loss:9.4811 || sec:6.3372
[280] loss:9.4861 || sec:6.5906