In [21]:
import pandas as pd
import numpy as np
import os
import torch
from torch import nn
from skorch import NeuralNetRegressor
from sklearn.model_selection import GridSearchCV
import matplotlib.pyplot as plt

output_nums=1


# 修改后的createXY函数
def createXY(dataset: pd.DataFrame, n_past: int, n_future: int, column_target: str):
    dataX, dataY = [], []
    for i in range(n_past, len(dataset) - n_future + 1):
        dataX.append(dataset.iloc[i - n_past:i].values)
        dataY.append(dataset.iloc[i:i + n_future][column_target].values)
    return np.array(dataX), np.array(dataY)

# 修改后的process_files函数
def process_files(columns_all, column_target, folder_path, n_past=1, n_future=1):
    all_dataX, all_dataY = np.array([]), np.array([])
    for filename in os.listdir(folder_path):
        file_path = os.path.join(folder_path, filename)
        if os.path.isfile(file_path):
            series = pd.read_csv(file_path)
            single_dataset = series[columns_all]
            dataX, dataY = createXY(single_dataset, n_past, n_future, column_target[0])
            all_dataX = np.vstack([all_dataX, dataX]) if all_dataX.size else dataX
            all_dataY = np.vstack([all_dataY, dataY]) if all_dataY.size else dataY
    return all_dataX, all_dataY

columns_all = ['CGM (mg / dl)', 'CSII - basal insulin (Novolin R, IU / H)']
column_target = ['CGM (mg / dl)']
folder_path = './diabetes_datasets/T1'

# 使用n_past=8, n_future=4调用process_files
dataX, dataY = process_files(columns_all, column_target, folder_path, n_past=8, n_future=output_nums)



In [22]:
import torch
from torch.utils.data import DataLoader, TensorDataset, random_split
from torch import nn, optim

# 假设dataX和dataY是你的数据
# 将它们转换为PyTorch张量，这里假设它们已经是Tensor或者从Numpy转换过来的
dataX_tensor = torch.tensor(dataX, dtype=torch.float32)
dataY_tensor = torch.tensor(dataY, dtype=torch.float32)

# 创建TensorDataset对象
dataset = TensorDataset(dataX_tensor, dataY_tensor)

# 其余拆分数据集、创建DataLoader对象的代码与之前相同
train_size = int(0.6 * len(dataset))
val_size = int(0.2 * len(dataset))
test_size = len(dataset) - (train_size + val_size)


# 随机拆分数据集
train_dataset, val_dataset, test_dataset = random_split(dataset, [train_size, val_size, test_size])

# 创建DataLoaders
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)
feature_nums = dataX.shape[2]


In [23]:
# 修改后的LSTMRegressor类
class LSTMRegressor(nn.Module):
    def __init__(self, num_units=50, dropout=0.2, output_size=output_nums):
        super(LSTMRegressor, self).__init__()
        self.lstm1 = nn.LSTM(input_size=feature_nums, hidden_size=num_units, batch_first=True)
        self.lstm2 = nn.LSTM(input_size=num_units, hidden_size=num_units, batch_first=True)
        self.dropout = nn.Dropout(dropout)
        self.dense = nn.Linear(in_features=num_units, out_features=output_size)
    
    def forward(self, X):
        X, _ = self.lstm1(X)
        X, _ = self.lstm2(X)
        X = self.dropout(X)
        X = X[:, -1, :]  # Get the last sequence output
        X = self.dense(X)
        return X

In [24]:
model = LSTMRegressor()
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 早停法参数
best_val_loss = float('inf')
patience = 10
patience_counter = 0
max_epochs = 100

# 训练过程
for epoch in range(max_epochs):
    model.train()
    
    # Training loop
    for data, target in train_loader:
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
    
    # 每个epoch后，在验证集上评估模型
    model.eval()
    with torch.no_grad():
        val_loss = 0
        for data, target in val_loader:
            output = model(data)
            val_loss += criterion(output, target).item()
        
    val_loss /= len(val_loader)

    print(f'Epoch {epoch}: Training Loss: {loss.item()}, Validation Loss: {val_loss}')

    # 早停法逻辑
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        patience_counter = 0
        # 保存最好的模型状态
        best_model_state = model.state_dict()
    else:
        patience_counter += 1

    if patience_counter >= patience:
        print(f'Early stopping triggered after {epoch} epochs.')
        model.load_state_dict(best_model_state)
        break



Epoch 0: Training Loss: 30601.103515625, Validation Loss: 28515.62468112245
Epoch 1: Training Loss: 25802.462890625, Validation Loss: 26187.59797512755
Epoch 2: Training Loss: 26021.341796875, Validation Loss: 24121.552853954083
Epoch 3: Training Loss: 20677.99609375, Validation Loss: 22235.36224489796
Epoch 4: Training Loss: 18962.0234375, Validation Loss: 20508.449398118624
Epoch 5: Training Loss: 14620.5400390625, Validation Loss: 18912.69232302296
Epoch 6: Training Loss: 17788.330078125, Validation Loss: 17452.179587850766
Epoch 7: Training Loss: 19132.6015625, Validation Loss: 16112.062679368622
Epoch 8: Training Loss: 12856.798828125, Validation Loss: 14877.438277264031
Epoch 9: Training Loss: 12905.1806640625, Validation Loss: 13750.096859056122
Epoch 10: Training Loss: 15616.0087890625, Validation Loss: 12723.531269929847
Epoch 11: Training Loss: 11456.5771484375, Validation Loss: 11788.053691007653
Epoch 12: Training Loss: 11498.2080078125, Validation Loss: 10938.547403140943


In [25]:
# 测试过程
model.eval()
test_loss = 0
with torch.no_grad():
    for data, target in test_loader:
        output = model(data)
        test_loss += criterion(output, target).item()

test_loss /= len(test_loader)
print(f'Test Loss: {test_loss}')

Test Loss: 57.204752201936685
