In [1]:
# 遍历文件
#17 800张图片
#21 1000张图片
#23 3000张图片

#数据检测环节，若检测到有0>矩阵第一列>矩阵第3列>1,>0>矩阵第二列>矩阵第四列>1的报错退出
import sys  
  
import math

import torch
import os
import cv2
import numpy as np
import torch.nn as nn
import datetime
import torch.optim as optim
import torch.nn.functional as F
import matplotlib.pyplot as plt
import warnings
import torch.utils.data as Data
# 禁用所有警告
warnings.filterwarnings("ignore")
version = "2"
# 读入图像路径
train_image_path = r"..\robot_simulation-"+version+r"\train\images"
train_label_path = r"..\robot_simulation-"+version+r"\train\labels"
valid_image_path = r"..\robot_simulation-"+version+r"\valid\images"
valid_label_path = r"..\robot_simulation-"+version+r"\valid\labels"

train_files = os.listdir(train_image_path)
valid_files = os.listdir(valid_image_path)

# N*C*H*W张量
def prepare_data(mode,batch_size):
    data = []
    labels = []
    if mode == "train":
        files = train_files
        image_path = train_image_path
        label_path = train_label_path
    elif mode == "valid":
        files = valid_files
        image_path = valid_image_path
        label_path = valid_label_path
    else:
        print("Invalid")
        
    for file in files:
        # 获得图片对应的label文件名
        prefix = file.split(".jpg")[0]

        label = np.loadtxt(label_path + "\\" + prefix + ".txt")
        label = torch.from_numpy(label)
        if label.shape[0] != 0:
            # 读取训练集图片
            image = cv2.imread(image_path + "\\" + file)
            image = cv2.normalize(image, None, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_32F)
            image = torch.from_numpy(np.array(image, dtype=np.float32))
            # 变为顺序
            image = image.permute(2, 0, 1)
    
            label_sliced = label[-label.shape[0]+1:] #xywh
            label_sliced_ = torch.zeros(4)
            
            label_sliced_[0] = label_sliced[0]-label_sliced[2]/2 #x1
            label_sliced_[2] = label_sliced[0]+label_sliced[2]/2 #y1
            label_sliced_[1] = label_sliced[1]-label_sliced[3]/2 #y2
            label_sliced_[3] = label_sliced[1]+label_sliced[3]/2 #y2
            data.append(image)
            labels.append(label_sliced_)
    data_t = torch.stack(data,dim=0)
    labels_t = torch.stack(labels,dim=0)
    size = data_t.shape[0]
    dataset = Data.TensorDataset(data_t,labels_t)
    loader = Data.DataLoader(
            # 从数据库中每次抽出batch size个样本
            dataset=dataset,
            batch_size=batch_size,
            shuffle=True,
            drop_last=True
    )
    return loader,size

train_batch_size = 32
valid_batch_size = 32
train_loader,train_size = prepare_data("train",train_batch_size)
valid_loader,valid_size = prepare_data("valid",valid_batch_size)



In [None]:

from IPython.display import clear_output
import math
import sys
import torch.nn.functional as F
import torchvision.ops.giou_loss as giou
import torchvision.ops.ciou_loss as ciou
from model_csp3 import Net
import pickle
import time

def initialize_model(model):
    for name, module in model.named_modules():
        if isinstance(module, (nn.Linear, nn.Conv2d)):
            nn.init.kaiming_uniform_(module.weight, mode='fan_in', nonlinearity='leaky_relu')
            if module.bias is not None:
                nn.init.zeros_(module.bias)

def epoch_loss(outputs,train_labels):
    cordinate_pred = outputs #n_batches*10*4
    cordinate_label = train_labels
    ciou_loss = ciou.complete_box_iou_loss(cordinate_pred,cordinate_label,reduction="mean")
    # ciou_loss = giou.generalized_box_iou_loss(cordinate_pred,cordinate_label,reduction="sum")
    return ciou_loss
            
#保存训练的模型和标签
def training_loop(model, n_epochs, optimizer, loss_fn,train_loder,valid_loader):
    
    for epoch in range(1, n_epochs + 1):
        since_s = time.time()
        loss_train = 0
        loss_val = 0
        count_t = 0
        count_v = 0
        train_times_in_a_epoch = math.floor(train_size/train_batch_size)
        valid_times_in_a_epoch = math.floor(valid_size/valid_batch_size)
        for imgs_t,labels_t in train_loder:
            imgs_t = imgs_t.to(device=device)
            labels_t = labels_t.to(device=device)
            outputs_t = model(imgs_t) #output_t (batchsize,10,5)
            loss_t = loss_fn(outputs_t,labels_t)
            count_t += 1  
            loss_train += loss_t.item()/train_times_in_a_epoch
            optimizer.zero_grad()
            loss_t.backward()
            optimizer.step()
            # print(f"step{count_t}/{train_times_in_a_epoch}\tloss: {loss_t}")
        
        for imgs_v,labels_v in valid_loader:
            imgs_v = imgs_v.to(device=device)
            labels_v = labels_v.to(device=device)
            outputs_v = model(imgs_v)
            loss_v = loss_fn(outputs_v,labels_v)
            loss_val += loss_v.item()/valid_times_in_a_epoch
            count_v += 1
        
        
        # scheduler.step()
        since_e = time.time()
        # print("lr={}".format(scheduler.get_last_lr()))
        
        loss_val_l.append(loss_val)
        loss_train_l.append(loss_train)
        print("epoch:{}\tloss_train:{:.3f}\tloss_val:{:.3f}\ttime:{:.3f}s".format(epoch,loss_train,loss_val,since_e - since_s))

        plt.figure(dpi=100, figsize=(6, 4))
        plt.plot(range(1, epoch+1),loss_train_l,color="red",label="train")
        plt.plot(range(1, epoch+1),loss_val_l,color="blue",label="val")
        plt.xlabel("epoch")
        plt.ylabel("ciou loss")
        # plt.xlim(1,n_epochs)
        plt.legend(['train','val'])
        plt.title('train model')
        plt.grid(True)
        plt.show()
        if epoch!=n_epochs:
            clear_output(wait=True) 
      
# 训练模型
if __name__ == "__main__":
    import matplotlib.pyplot as plt
    loss_val_l = []
    loss_train_l = []
    x = []
    # fig, ax = plt.subplots(figsize=(6, 4), dpi=600)  
    device = 0
    # model = Net().to(device)
    # initialize_model(model)
    model = torch.load("model_csp3_1000.pth")
    # optimizer = optim.SGD(model.parameters(),lr=1e-4)
    optimizer = optim.Adam(model.parameters(),lr=1e-4, betas=(0.9, 0.999), eps=1e-08, weight_decay=0)
    loss_fn = epoch_loss
    n_epochs = 1000
    #batch_size = 16 57.25s
    #batch_size = 64 
    #batch_size = 128 
    # scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,[n_epochs*0.3,n_epochs*0.6],gamma=0.5)

    try:
        start_time = time.time()
        print("start training!")
        training_loop(model=model, n_epochs=n_epochs, optimizer=optimizer, loss_fn=loss_fn,train_loder=train_loader,valid_loader=valid_loader)
        end_time = time.time()
        # 将秒转换为小时、分钟和秒
        elapsed_time = end_time - start_time
        hours, remainder = divmod(elapsed_time, 3600)
        minutes, seconds = divmod(remainder, 60)
        # 打印结果
        print(f"running : {int(hours)} h, {int(minutes)} m, {int(seconds)} s")
    finally:
        torch.save(model,"model_csp3_1000.pth")
        
        # with open('model_csp3_1000_t.pkl', 'wb') as f:
        #     pickle.dump(loss_train_l, f)
        # with open('model_csp3_1000_v.pkl', 'wb') as f:
        #     pickle.dump(loss_val_l, f)