In [1]:
import numpy as np
from sklearn.model_selection import train_test_split
import torch
from torch.utils.data import TensorDataset, DataLoader
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from utils import EarlyStopping
from models import RegressionNet, RegressionDropout, RegressionWithBatchNorm, CustomModel1, CustomModel2
from generator import generate_data
from torch.utils.tensorboard import SummaryWriter
from datetime import datetime
import os

# 获取当前时间戳并生成唯一的log目录
current_time = datetime.now().strftime('%Y%m%d-%H%M%S')

In [None]:
from torch.optim.lr_scheduler import CosineAnnealingLR
"""提交到服务器前修改超参数"""
sample_nums = 100
num_epochs = 2
batch_size = 8
patience =5
"""
构建网络 创建model时候指定参数
选择损失函数和优化器
"""
input_size = 400
hidden_size = 16
output_size = 5
model = CustomModel1()
# 训练记录名修改
# 格式 {模型名称}_{超参数}_{时间}
# 不同模型的超参数可能不同
writer = SummaryWriter(log_dir=f'./logs/{model.name}_{current_time}')


criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)
scheduler = CosineAnnealingLR(optimizer, T_max=100, eta_min=0.00001)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

CustomModel1(
  (conv1): Conv1d(1, 32, kernel_size=(3,), stride=(1,), padding=(1,))
  (pool): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv1d(32, 64, kernel_size=(3,), stride=(1,), padding=(1,))
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (fc1): Linear(in_features=6400, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=5, bias=True)
)

In [3]:
"""
除非对数据集划分比例有要求否则无需更改
"""
x_data, y_data = generate_data(sample_nums, normal_distribution= False)
input_data = y_data
output_data = x_data

X_train, X_temp, y_train, y_temp = train_test_split(input_data, output_data, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32)

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [None]:
"""
除非网络对输入有特殊要求非则无需改动
"""
early_stopping = EarlyStopping(patience=patience, verbose=True)
stopped_epoch = None
best_val_loss = float('inf')
best_model_state = None
print(f"training with {device}")
save_path = f"./weight/{model.name}"
os.makedirs(f"{save_path}", exist_ok=True)
for epoch in range(num_epochs):
    model.train()
    train_loss = 0
    for batch_x, batch_y in train_loader:

        batch_x = batch_x.to(device)
        batch_y = batch_y.to(device)

        optimizer.zero_grad()
        batch_x = batch_x.unsqueeze(1)
        outputs = model(batch_x)
        loss = criterion(outputs, batch_y)
        train_loss += loss.item() * batch_x.size(0)
        loss.backward()

        optimizer.step()
        
    train_loss /= len(train_loader.dataset)
    
    scheduler.step()
    current_lr = scheduler.get_last_lr()[0]

    model.eval()
    val_loss = 0
    test_loss = 0
    with torch.no_grad():
        for batch_x, batch_y in val_loader:
            batch_x = batch_x.to(device)
            batch_y = batch_y.to(device)

            batch_x = batch_x.unsqueeze(1)
            outputs = model(batch_x)
            loss = criterion(outputs, batch_y)
            val_loss += loss.item() * batch_x.size(0)

        for batch_x, batch_y in test_loader:
            batch_x = batch_x.to(device)
            batch_y = batch_y.to(device)
            
            batch_x = batch_x.unsqueeze(1)
            outputs = model(batch_x)
            loss = criterion(outputs, batch_y)
            test_loss += loss.item() * batch_x.size(0)

    val_loss /= len(val_loader.dataset)
    test_loss /= len(test_loader.dataset)
    print(f'Epoch {epoch+1}/{num_epochs}, Training Loss : {train_loss:.4f}, Validation Loss: {val_loss:.4f}')
    print(f'Test Loss:{test_loss}')
    writer.add_scalars('Loss', {'Train': train_loss, 
                            'Validation': val_loss, 
                            'Test': test_loss}, epoch)
    writer.add_scalar('Learning Rate', current_lr, epoch)
    
    if epoch > 30:
        early_stopping(val_loss, model)
        if early_stopping.early_stop:
            print(f"Early stopping at epoch:{epoch}")
            stopped_epoch = epoch
            break

    if val_loss < best_val_loss:
        best_val_loss = val_loss
        best_model_state = model.state_dict()
        best_model_epoch = epoch
        
writer.close()

model_save_name = f"{model.name}_{current_time}"


torch.save(best_model_state, f'{save_path}/{model_save_name}_{best_model_epoch}.pth')
if early_stopping.early_stop:
    torch.save(model.state_dict(), f'{save_path}/{model_save_name}_{stopped_epoch}.pth')
else:
    torch.save(model.state_dict(), f'{save_path}/{model_save_name}_{num_epochs}.pth')

training with cpu
Epoch 1/2, Training Loss : 1007.1515, Validation Loss: 567.1935
Test Loss:567.4401848347982
Epoch 2/2, Training Loss : 553.2293, Validation Loss: 508.2712
Test Loss:530.1266231262207
