In [1]:
import torch
import glob
import cv2
import os
import numpy as np
import random
import time
import matplotlib.pyplot as plt
from torch.optim.lr_scheduler import ReduceLROnPlateau
from xml.dom.minidom import parse
from torchvision.models.detection.faster_rcnn import fasterrcnn_resnet50_fpn

In [2]:
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "2"

In [3]:
idx2label = ["__background__", "person", "bicycle", "car", "motorbike", "bus"]
label2idx = {label:idx for idx, label in enumerate(idx2label)}

In [4]:
def get_image_data(img_name_list, data_dir = "./data/Image/"):
    image_data = dict()
    for image_file in img_name_list:
        image_filename = os.path.join(data_dir, image_file + ".png")
        image_data[image_file] = torch.Tensor(cv2.imread(image_filename)).permute(2,0,1)
    return image_data

In [5]:
def get_annotations_data(annotations_list, data_dir = "./data/Annotations/"):
    annotations_data = dict()
    for annotations in annotations_list:
        domTree = parse(os.path.join(data_dir, annotations+".xml"))
        rootNode = domTree.documentElement
        obj_list = rootNode.getElementsByTagName("object")
        label_list = []
        boxes_list = []
        for obj in obj_list:
            label_list.append(int(label2idx[obj.getElementsByTagName("name")[0].firstChild.data]))
            boxes_list.append([
                    float(obj.getElementsByTagName("bndbox")[0].getElementsByTagName("xmin")[0].firstChild.data),
                    float(obj.getElementsByTagName("bndbox")[0].getElementsByTagName("ymin")[0].firstChild.data),
                    float(obj.getElementsByTagName("bndbox")[0].getElementsByTagName("xmax")[0].firstChild.data),
                    float(obj.getElementsByTagName("bndbox")[0].getElementsByTagName("ymax")[0].firstChild.data)
                ])
        annotations_data[(rootNode.getElementsByTagName("filename")[0].firstChild.data).split(".png")[0]] = {"boxes":torch.Tensor(boxes_list),"labels":torch.LongTensor(label_list)}
    return annotations_data

In [6]:
def get_img_name_list(data_dir = "./data/train.txt"):
    with open(data_dir, "r") as f:
        img_name_list = f.readlines()
    f.close()
    img_name_list = [img_name.strip() for img_name in img_name_list]
    return img_name_list

In [7]:
def get_data(data_list_filepath = "./data/train.txt"):
    data = []
    img_name_list = get_img_name_list(data_list_filepath)
    image_data = get_image_data(img_name_list)
    annotations_data = get_annotations_data(img_name_list)
    for img_name in img_name_list:

        if not image_data.__contains__(img_name):
#             print(img_name)
            continue

        if not annotations_data.__contains__(img_name):
#             print(img_name)
            continue
        
        data.append([image_data[img_name], annotations_data[img_name]])
    
    return data

In [8]:
def make_batch(all_data, batch_size = 64):
    batches = []
    idx = 0
    random.shuffle(all_data)
    size = len(all_data)
    while idx < size:
        batches.append(np.array(all_data[idx:idx + batch_size]).transpose(1, 0))
        idx += batch_size
    
    return batches

In [9]:
def divide_train_test(data_dir="./data", split_rate=0.05):
    with open(os.path.join(data_dir, "all.txt"), "r") as f:
        img_name_list = f.readlines()
    f.close()
    img_name_list = [img_name.strip() for img_name in img_name_list]
    
    random.shuffle(img_name_list)
    
    split_rate = 1 - split_rate
    img_name_list_len = len(img_name_list)
    img_train_list = img_name_list[:int(img_name_list_len*split_rate)]
    img_test_list = img_name_list[int(img_name_list_len*split_rate):]
    
    with open(os.path.join(data_dir, "train.txt"), "w") as f:
        for item in img_train_list:
            f.write(item+"\n")
    f.close()
        
    with open(os.path.join(data_dir, "test.txt"), "w") as f:
        for item in img_test_list:
            f.write(item+"\n")
    f.close()

In [10]:
def writelog(img_name, boxes, labels, scores, data_dir = "./data/pt/", threshold = 0):
    data_len = len(scores)
    with open(os.path.join(data_dir, img_name + ".txt"), "w") as f:
        for i in range(data_len):
            if scores[i] > threshold :
                f.write("{} {} {} {} {}\n".format(labels[i], boxes[i][0], boxes[i][1], boxes[i][2], boxes[i][3]))
    f.close()

In [11]:
def compute_IOU(rec1,rec2):
    #rec: rectangle, [xmin ymin xmax ymax]
    #return IoU of rec1 and rec2
    width=max(0,min(rec1[2],rec2[2])-max(rec1[0],rec2[0]))
    hight=max(0,min(rec1[3],rec2[3])-max(rec1[1],rec2[1]))
    inter=width*hight
    union=(rec1[3]-rec1[1])*(rec1[2]-rec1[0])+(rec2[3]-rec2[1])*(rec2[2]-rec2[0])-inter
    return inter/union

In [12]:
def evaluate(pred_boxes, pred_labels, gt_boxes, gt_labels):
    pred_len = len(pred_boxes)
    gt_len = len(gt_boxes)
    count = 0
    match = np.zeros(gt_len)
    for i in range(pred_len):
        for j in range(gt_len):
            if compute_IOU(pred_boxes[i], gt_boxes[j]) > 0.5 and pred_labels[i] == gt_labels[j] and match[j] == 0:
                count += 1
                match[j] = 1
                break
    num_correct = count
    num_error = pred_len - count
    num_miss = gt_len - count
#     print('correct: {}, error: {}, miss: {}'.format(num_correct,num_error,num_miss))
    
    return num_correct, num_error, num_miss

In [13]:
def evaluate_model_nobatch(img_name_list, image_data, annotations_data):
#     model = torch.load("best_model.pt", map_location=torch.device('cpu'))
    model.eval()
    
    num_correct = 0
    num_error = 0 
    num_miss = 0
    
    for img_name in img_name_list:
        if not image_data.__contains__(img_name):
    #             print(img_name)
            continue

        if not annotations_data.__contains__(img_name):
    #             print(img_name)
            continue

        with torch.no_grad():
            pred_data = model([image_data[img_name].cuda()])
            
        tmp_num_correct, tmp_num_error, tmp_num_miss = evaluate(pred_data[0]["boxes"].cpu(), pred_data[0]["labels"].cpu(), 
                                                                annotations_data[img_name]["boxes"], annotations_data[img_name]["labels"])
        num_correct = num_correct + tmp_num_correct
        num_error = num_error + tmp_num_error
        num_miss = num_miss + tmp_num_miss
        
#         writelog(img_name = img_name, boxes = pred_data[0]["boxes"].cpu(), labels = pred_data[0]["labels"].cpu(), scores = pred_data[0]["scores"].cpu())
        
    mAP=num_correct/(num_correct+num_error)
    mAR=num_correct/(num_correct+num_miss)
    F_measure=2*mAP*mAR/(mAP+mAR)
    print('mAP={}\n mAR={}\n F-measure={}'.format(mAP,mAR,F_measure))

In [14]:
def evaluate_all(pred_boxes, pred_labels, gt_boxes, gt_labels):
    
    num_correct = 0
    num_error = 0 
    num_miss = 0
    
    eval_len = len(pred_boxes)
    for i in range(eval_len):
        tmp_num_correct, tmp_num_error, tmp_num_miss = evaluate(pred_boxes[i], pred_labels[i], gt_boxes[i], gt_labels[i])
        num_correct = num_correct + tmp_num_correct
        num_error = num_error + tmp_num_error
        num_miss = num_miss + tmp_num_miss
        
    mAP=num_correct/(num_correct+num_error)
    mAR=num_correct/(num_correct+num_miss)
    F_measure=2*mAP*mAR/(mAP+mAR)
    print('mAP={}\n mAR={}\n F-measure={}'.format(mAP,mAR,F_measure))

In [15]:
def evaluate_model(img_name_list, image_data, annotations_data):
    
    model.eval()

    pred_img = []
    gt_data = []
    for img_name in img_name_list:
        if not image_data.__contains__(img_name):
    #             print(img_name)
            continue

        if not annotations_data.__contains__(img_name):
    #             print(img_name)
            continue

        pred_img.append(image_data[img_name])
        gt_data.append(annotations_data[img_name])

    with torch.no_grad():
        pred_data = model(pred_img)

    pred_eval_boxes = []
    pred_eval_labels = []
    gt_eval_boxes = []
    gt_eval_labels = []

    for item in pred_data:
        pred_eval_boxes.append(item["boxes"])
        pred_eval_labels.append(item["labels"])
        # wirte predict log
#         writelog(img_name = img_name, boxes = item["boxes"], labels = item["labels"], scores = item["scores"])

    for item in gt_data:
        gt_eval_boxes.append(item["boxes"])
        gt_eval_labels.append(item["labels"])

    evaluate_all(pred_eval_boxes, pred_eval_labels, gt_eval_boxes, gt_eval_labels)

In [16]:
def train_one_epoch(model, batch, optimizer):
    
    model.train()
    
    images, targets = batch
    images = list(image.cuda() for image in images)
    targets = [{k: v.cuda() for k, v in t.items()} for t in targets]
    
    loss_dict = model(images, targets)
    losses = sum(loss for loss in loss_dict.values())
    optimizer.zero_grad()
    losses.backward()
    optimizer.step()
    return losses.item()

In [17]:
def train(model, epochs, batches, optimizer, scheduler):
    
    minn_losses = np.inf
    
    for epoch in range(epochs):
        start_time = time.time()
        print("epoch:", epoch + 1)
        losses = 0
        
        for batch in batches:
            loss = train_one_epoch(model, batch, optimizer)
            losses += loss
                
        cur_losses = losses/len(batches)
        scheduler.step(cur_losses)
        #         evaluate_model_nobatch(test_img_name_list, test_image_data, test_annotations_data)
        print("time cost:", time.time() - start_time)
        print("losses:", cur_losses)
        
        if(minn_losses>cur_losses):
            torch.save(model, "./best_model_all.pt")
            minn_losses=cur_losses

In [18]:
divide_train_test()
train_data = get_data("./data/all.txt")
batches = make_batch(train_data, batch_size = 4)

# test_img_name_list = get_img_name_list("./data/test.txt")
# test_image_data = get_image_data(test_img_name_list)
# test_annotations_data = get_annotations_data(test_img_name_list)

In [19]:
model = fasterrcnn_resnet50_fpn(pretrained = False, pretrained_backbone = True, num_classes = len(idx2label))
# model = fasterrcnn_resnet50_fpn(pretrained = True)

In [20]:
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4, weight_decay=1e-4)
scheduler = ReduceLROnPlateau(optimizer, mode = "min", patience = 2, verbose = True)

In [22]:
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# model.to(device)
model.cuda()
epochs = 200

In [None]:
train(model, epochs, batches, optimizer, scheduler)

epoch: 1
time cost: 527.0271625518799
losses: 0.024692895339336725
epoch: 2
time cost: 518.4622974395752
losses: 0.024786996607547136
epoch: 3
time cost: 518.3007614612579
losses: 0.024583294268128945
epoch: 4
time cost: 517.1050350666046
losses: 0.024562149716135333
epoch: 5
time cost: 517.0489892959595
losses: 0.024441821734446798
epoch: 6
time cost: 517.2553100585938
losses: 0.024583944907499992
epoch: 7
time cost: 516.312757730484
losses: 0.024697236533449983
epoch: 8
Epoch     8: reducing learning rate of group 0 to 1.0000e-07.
time cost: 517.194233417511
losses: 0.024693996057570944
epoch: 9
time cost: 517.1616199016571
losses: 0.024521582700572486
epoch: 10
time cost: 517.1106107234955
losses: 0.024563755211204994
epoch: 11
Epoch    11: reducing learning rate of group 0 to 1.0000e-08.
time cost: 516.9329569339752
losses: 0.02477929790557878
epoch: 12
time cost: 516.8449988365173
losses: 0.024742486365263917
epoch: 13
time cost: 516.6490831375122
losses: 0.024723902883827458
epoc

In [57]:
torch.cuda.empty_cache()