# DL template
我想了很久到底用什么文件去写这个，因为我感觉这个文件会是未来不论是师弟师妹的传承，还是代码的复习都会有很大的帮助。因为这篇文章就像名字一样，我准备将前期学习的pytorch做一个总结，针对不同的模块，规定一个代码的范式，这样后面不论是更改模型，还是复现代码，都会更加的方便。
之所以选择使用这个模式的文档，是因为更希望代码和解释能更清楚。我首先会分成几个部分写，最后汇总成一个。
以下是在整理代码时，参考的链接(除了在比赛中使用见过的代码)：
https://github.com/victoresque/pytorch-template
https://github.com/bubbliiiing/yolov7-pytorch
在写的途中，我发现很多人都是利用了什么yaml、json等一系列的配置文件，但是我真的个人感觉，这种没有那种，直接写出来的看的舒服。所以我准备了这个模板。

## 需要的库

In [None]:
import torch
import numpy as np
import torch.nn as nn
from torch.utils.data import DataLoader
import tqdm
import albumentations as A
import cv2
import time
import os

## 随机种子

In [None]:
SEED = 123
torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(SEED)

## 加载模型

In [None]:
class Model(nn.Module):
    def __init__(self):#这里放初始化的参数
        super(Model,self).__init__()
        pass
    def forward(self,x):
        pass

In [None]:
device = 'cuda'
model_path = ''
model = Model()
###模型初始化###
model_dict      = model.state_dict()
pretrained_dict = torch.load(model_path, map_location = device)
###选择权重添加###
model.load_state_dict(model_dict)
model.to(device)

## 数据增强

In [None]:
def build_transforms(img_size):
    data_transforms = {
        "train": A.Compose([
            A.Resize(img_size, interpolation=cv2.INTER_NEAREST),
            A.HorizontalFlip(p=0.5),
            A.VerticalFlip(p=0.5),
            A.RandomRotate90(p=0.5),
            A.ShiftScaleRotate(shift_limit=0.0625, scale_limit=0.2, rotate_limit=15, p=0.9, border_mode=cv2.BORDER_REFLECT),
            A.OneOf([
                A.OpticalDistortion(p=0.3),
                A.GridDistortion(p=.1),
                # IAAPiecewiseAffine(p=0.3),
            ], p=0.3),
            A.OneOf([
                A.HueSaturationValue(10,15,10),
                A.CLAHE(clip_limit=2),
                A.RandomBrightnessContrast(),            
            ], p=0.3),
            ], p=1.0),
        "valid_test": A.Compose([
            A.Resize(img_size, interpolation=cv2.INTER_NEAREST),
            ], p=1.0)
        }
    return data_transforms

## 数据加载

In [None]:
class model_Dataset(nn.Module):
    def __init__(self) :#这里存放输入
        super().__init__(model_Dataset,self)
        ###这里做数据读取的初始化--->得到文件所在目录###
        ###数据增强之类的初始化###
        pass
    def __len__(self):
        pass
    def __getitem__(self,index):
        ###根据序号，提取数据###
        pass

In [None]:
def build_dataset_dataloader(data_transforms):
    train_dataset = model_Dataset(data_transforms)
    val_dataset = model_Dataset(data_transforms)
    train_dataloader = DataLoader(train_dataset,batch_size=1,shuffle=True,num_workers=16)
    val_dataloader = DataLoader(val_dataset,batch_size=1,shuffle=True,num_workers=16)
    return train_dataloader,val_dataloader

## 优化器学习率搭建

In [None]:
init_lr = 5e-5
weight_decay = 5e-4
momentum = 0.937
optimizer = torch.optim.AdamW(model.parameters(), lr=init_lr,momentum = momentum, weight_decay=weight_decay) # optimizer
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size = 15, gamma=0.1, last_epoch=-1) 
###更多的学习率调整策略可以查看torch.optim.lr_scheduler###
###也可以通过自己写学习率的调整策略###

## 搭建Loss and metric

In [None]:
class ModelLoss(nn.Module):
    def __init__(self) :#这里放初始化的输入
        super().__init__(ModelLoss,self)
        #这里是loss的实例化
        pass
    def __call__(self,):#这里放预测真值等
        pass
class ModelMetric(nn.Module):
    def __init__(self) :#这里放初始化的输入
        super().__init__(ModelMetric,self)
        #这里是metric的实例化
        pass
    def __call__(self,):#这里放预测真值等
        pass

In [None]:
model_loss    = ModelLoss()
model_metric = ModelMetric()

## train_one_epoch

In [None]:
fp16 = False#这是用来定义半精度训练的
if fp16:
    from torch.cuda.amp import GradScaler as GradScaler
    scaler = GradScaler()
else:
    scaler = None

In [None]:
def train_one_epoch(model, train_loader, optimizer, lr_scheduler,device,losses_dict):
    model.train()
    all_loss = 0
    pbar = tqdm(enumerate(train_loader), total=len(train_loader), desc='Train ')
    for _, (images, labels) in pbar:
        optimizer.zero_grad()
        images = images.to(device, dtype=torch.float) 
        labels  = labels.to(device, dtype=torch.float)  
        if not fp16:#不做半精度训练
            y_preds = model(images) 
            loss = losses_dict(y_preds, labels)
            loss.backward()
            optimizer.step()
            lr_scheduler.step()
        else:
            from torch.cuda.amp import autocast
            with autocast():
                outputs         = model(images)
                loss      = losses_dict(outputs, labels)

            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.step(lr_scheduler)
            scaler.update()
        all_loss+=loss.item()
        ###logger记录整个过程###

## val_one_epoch

In [None]:
def val_one_epoch(model, val_loader,device,metric):
    model.eval()
    all_acc = 0
    pbar = tqdm(enumerate(val_loader), total=len(val_loader), desc='Val ')
    for _, (images, labels) in pbar:
        images = images.to(device, dtype=torch.float) 
        labels  = labels.to(device, dtype=torch.float)  
        y_preds = model(images) 
        acc = metric(y_preds, labels)
    all_acc+=acc
    return all_acc
        ###logger记录整个过程###

## main

In [None]:
 

if __name__ == '__main__':

    seed = 42 
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    num_worker = 16 
    data_path = ""
    third_data_path = ""
    ckpt_path = "" 
    # step2: data
    img_size = [512, 512]
    train_bs = 8
    valid_bs = train_bs * 2
    # step3: model
    # step4: optimizer
    epoch = 50
    lr = 1e-4
    wd = 1e-5
    lr_drop = 30
    # step5: infer
    thr = 0.3

    train_val_flag = True
    if train_val_flag:
        data_transforms = build_transforms(img_size)  
        train_loader, valid_loader = build_dataset_dataloader(data_transforms) # dataset & dtaloader
        model.to(device)

        best_val_acc = 0
        best_epoch = 0
        for epoch_now in range(1, epoch+1):
            start_time = time.time()
            train_one_epoch(model, train_loader, optimizer, lr_scheduler,device,model_loss)
            # lr_scheduler.step()
            val_acc = val_one_epoch(model, valid_loader,device,model_metric)
            ##### >>>>>>> step4: save best model <<<<<<
            is_best = (val_acc > best_val_acc)
            best_val_acc = max(best_val_acc, val_acc)
            if is_best:
                save_path = f"{ckpt_path}/epoch{epoch}_dice{best_val_acc:.4f}.pth"
                if os.path.isfile(save_path):
                    os.remove(save_path) 
                torch.save(model.state_dict(), save_path)
            
            epoch_time = time.time() - start_time
            print("epoch:{}, time:{:.2f}s, best:{:.2f}\n".format(epoch, epoch_time, best_val_acc), flush=True)
        

### 小记
如果说分块写的话，主要的主要就是上面那几部分，但是其实还远远不够，因为很多细节都没有补充，所以下面我会增加一些细节的实现模块。同时这只是一个简单的类似README的文件，我准备在这个文件夹下，建立一个模板项目。