In [17]:
#Import Library
import torch
import torchvision
from torch.utils.data import Dataset, DataLoader
from torch.utils.tensorboard import SummaryWriter
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt
from collections import OrderedDict                                            
import os

import time

import imgaug as ia
from imgaug import augmenters as iaa                                              # image augmentation 

import cv2
from PIL import Image
from torchvision import transforms

import xml.etree.ElementTree as ET                                                   # xml 읽어 오는 라이브러리 

from xml.etree.ElementTree import Element, ElementTree

In [18]:
#xml 함수 정의
def xml_parser(xml_path):
    xml_path = xml_path
    xml = open(xml_path,"r")            # xml 열기 
    tree = ET.parse(xml)             # tree부분 
    root = tree.getroot()            # root 
    size = root.find('size')            #root 부분에서 size 변수 찾기  
    file_name = root.find('filename').text              # filename 찾기 
    object_name = []
    bbox = []
    objects = root.findall('object')
    for _object in objects:
        name = _object.find('name').text
        object_name.append(name)
        bndbox = _object.find('bndbox')              #bounding box 
        one_bbox = []
        xmin = bndbox.find("xmin").text            # x좌표 왼쪽부분 
        one_bbox.append(int(float(xmin))) 
        ymin = bndbox.find("ymin").text            # y좌표 왼쪽부분 
        one_bbox.append(int(float(ymin)))
        xmax = bndbox.find("xmax").text
        one_bbox.append(int(float(xmax)))          # x좌표 오른쪽부분 
        ymax = bndbox.find("ymax").text
        one_bbox.append(int(float(ymax)))          # y좌표 오른쪽부분 
        bbox.append(one_bbox)
    return file_name, object_name, bbox

In [19]:
#이미지 내 박스 그리는 함수
def makeBox(voc_im,bbox,objects):
    image = voc_im.copy()
    for i in range(len(objects)):                 # 이미지 내에 box 그리는 함수 
        cv2.rectangle(image,(int(bbox[i][0]),int(bbox[i][1])),(int(bbox[i][2]),int(bbox[i][3])),color = (0,255,0),thickness = 1)
        cv2.putText(image, objects[i], (int(bbox[i][0]), int(bbox[i][1])-10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0,255,0), 2)
    return image

In [20]:
#라벨 및 객체 정
xml_list = os.listdir("/home/sogaksa123/AIFFEL_THON/data/dataset5_0403/with_normal_224/xml_files/")        # xml list directory
xml_list.sort()         # file name sort 

label_set = set() 

for i in range(len(xml_list)):
    xml_path = '/home/sogaksa123/AIFFEL_THON/data/dataset5_0403/with_normal_224/xml_files/' + str(xml_list[i])
    file_name, object_name, bbox = xml_parser(xml_path)
    for name in object_name:
        label_set.add(name)           # object name add (채우기)

label_set = sorted(list(label_set))      

label_dic = {}
for i, key in enumerate(label_set):
    label_dic[key] = (i+1)             # label 세 가지  1,2, 3

print(label_dic)

{'Normal': 1, 'crackles': 2, 'wheezes': 3}


In [21]:
#사용할 데이터 셋 만들
class Pascal_Vo(Dataset):
    def __init__(self, xml_list, len_data):
        
        self.xml_list = xml_list
        self.len_data = len_data                      # 데이터셋 길이
        self.to_tensor = transforms.ToTensor() 
        self.flip = iaa.Fliplr(0.5)              # augmentation flip 사용 
        self.resize = iaa.Resize({'shorter-side': 600, "longer-side":"keep-aspect-ratio"})   # augmentation resize 사용 
        
    def __len__(self):
        return self.len_data
    
    def __getitem__(self, idx):
        
        xml_path = '/home/sogaksa123/AIFFEL_THON/data/dataset5_0403/with_normal_224/xml_files/' + str(xml_list[idx])
        
        file_name, object_name, bbox = xml_parser(xml_path)
        image_path = '/home/sogaksa123/AIFFEL_THON/data/dataset5_0403/with_normal_224/png_files'+str(file_name)
        image = Image.open(image_path).convert('RGB')
        image = np.array(image)
        
        image, bbox = self.flip(image = image, bounding_boxes = np.array([bbox]))
        image, bbox = self.resize(image=image, bounding_boxes = bbox)
        bbox = bbox.squeeze(0).tolist()
        image= self.to_tensor(image)
        
        targets=[]
        d = {}
        d['boxes'] = torch.tensor(bbox)
        d['labels'] = torch.tensor([label_dic[x] for x in object_name], dtype=torch.int64)
        targets.append(d)
        
        return image, targets

In [25]:
#Faster RCNN 모델 정의
backbone = torchvision.models.vgg16(pretrained=True).features[:-1]    # pretrained model 불러오기 
backbone_out = 512              # output size 
backbone.out_channels = backbone_out   

anchor_generator = torchvision.models.detection.rpn.AnchorGenerator(sizes=((128,256,512),),aspect_ratios=((0.5,1.0,2.0),))
# detection generator 만들기

resolution = 7
roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'],output_size=resolution,sampling_ratio=2)    # ROi 정의 

box_head = torchvision.models.detection.faster_rcnn.TwoMLPHead(in_channels = backbone_out*(resolution**2),representation_size=4096)
box_predictor = torchvision.models.detection.faster_rcnn.FastRCNNPredictor(4096,4) # class 의 개수

# faster rcnn model Linear 부분 + Predictor 부분 


model = torchvision.models.detection.FasterRCNN(backbone, num_classes=None,
                        min_size= 600, max_size = 1000,
                        rpn_anchor_generator= anchor_generator,
                        rpn_pre_nms_top_n_train=6000, rpn_pre_nms_top_n_test=6000,
                        rpn_post_nms_top_n_train= 2000, rpn_post_nms_top_n_test=300,
                        rpn_nms_thresh=0.7, rpn_fg_iou_thresh=0.7, rpn_bg_iou_thresh=0.3,
                        rpn_batch_size_per_image= 256, rpn_positive_fraction=0.5,
                        box_roi_pool=roi_pooler, box_head= box_head, box_predictor = box_predictor,
                        box_score_thresh=0.05, box_nms_thresh=0.7, box_detections_per_img=300,
                        box_fg_iou_thresh=0.5, box_be_iou_thresh=0.5,
                        box_batch_size_per_image=128, box_positive_fraction=0.25
                    )                      # model 하이퍼 파리미터 부분 : output  size 바꾸면 다 바꿔야 됨 
for param in model.rpn.parameters():
    torch.nn.init.normal_(param, mean=0.0, std=0.01)     # normalize

for name, param in model.roi_heads.named_parameters():
    if "bbox_pred" in name:
        torch.nn.init.normal_(param, mean=0.0, std=0.001)
    elif "weight" in name:
        torch.nn.init.normal_(param, mean =0.0, std=0.01)
    if "bias" in name:
        torch.nn.init.zeros_(param)

In [26]:
#loss function 정
def Total_loss(loss):
    loss_objectness = loss['loss_objectness']
    loss_rpn_box_reg = loss['loss_rpn_box_reg']
    loss_classifier = loss['loss_classifier']
    loss_box_reg = loss['loss_box_reg']
    
    rpn_total = loss_objectness + 10*loss_rpn_box_reg  #(람다라고 보면된다)
    fast_rcnn_total = loss_classifier + 1*loss_box_reg
    
    total_loss = rpn_total + fast_rcnn_total
    
    return total_loss

In [None]:
#Train
total_epoch = 50

len_data = 10  # 한 에폭당 이미지 개수  

loss_sum = 0  

optimizer = torch.optim.SGD(params = model.parameters(), lr=0.001, momentum=0.9, weight_decay = 0.0005) # SGD optimizer 
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, total_epoch,eta_min=0.00001)  # scheduler 사용 

start_epoch = 0
start_idx = 0

print("start_epoch = {} , start_idx = {}".format(start_epoch,start_idx))

print("Training Start")
model.train()       # train 모드 
start = time.time()

for epoch in range(start_epoch, total_epoch):
    
    dataset = Pascal_Vo(xml_list[:len_data], len_data - start_idx)   # dataset 불러오기 
    dataloader = DataLoader(dataset, shuffle=True)    # dataloader 생성
    
    for i, (image, targets) in enumerate(dataloader, start_idx):   # index image target(label) 불러오기 
        
        optimizer.zero_grad()
        
        targets[0]['boxes'].squeeze_(0)
        targets[0]['labels'].squeeze_(0)
        
        loss = model(image,targets)   
        total_loss = Total_loss(loss)
        loss_sum += total_loss
        
        
        total_loss.backward()
        optimizer.step()
    
    start_idx = 0
    scheduler.step()
    
    state ={
        'epoch' : epoch,
        'iter' :i+1,
        'state_dict' :model.state_dict(),
        'optimizer':optimizer.state_dict(),
        'scheduler' : scheduler.state_dict()
    }
    print('epoch:' + str(epoch))
    print('loss:' + str(total_loss))

start_epoch = 0 , start_idx = 0
Training Start
epoch:0
loss:tensor(3.9004, grad_fn=<AddBackward0>)
epoch:1
loss:tensor(3.9662, grad_fn=<AddBackward0>)
epoch:2
loss:tensor(3.0169, grad_fn=<AddBackward0>)
epoch:3
loss:tensor(2.8566, grad_fn=<AddBackward0>)
epoch:4
loss:tensor(3.5908, grad_fn=<AddBackward0>)
epoch:5
loss:tensor(2.6238, grad_fn=<AddBackward0>)
epoch:6
loss:tensor(3.5383, grad_fn=<AddBackward0>)
epoch:7
loss:tensor(3.3833, grad_fn=<AddBackward0>)
epoch:8
loss:tensor(2.3097, grad_fn=<AddBackward0>)
epoch:9
loss:tensor(2.9619, grad_fn=<AddBackward0>)
epoch:10
loss:tensor(2.3994, grad_fn=<AddBackward0>)
epoch:11
loss:tensor(2.7995, grad_fn=<AddBackward0>)
epoch:12
loss:tensor(2.4227, grad_fn=<AddBackward0>)
epoch:13
loss:tensor(2.7964, grad_fn=<AddBackward0>)
epoch:14
loss:tensor(3.8370, grad_fn=<AddBackward0>)
epoch:15
loss:tensor(3.3450, grad_fn=<AddBackward0>)
epoch:16
loss:tensor(3.5231, grad_fn=<AddBackward0>)
epoch:17
loss:tensor(2.4979, grad_fn=<AddBackward0>)
epoch:18
