In [263]:
from torch import optim
from torch.utils.data import Dataset,DataLoader
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [264]:
# 自定义数据集
class ConvidDataset(Dataset):
    def __init__(self,path,mode = "train",transform = None):
        self.mode = mode
        data = pd.read_csv(path)
        data = np.array(data[1:])[:,1:].astype(np.float32)
        self.transform = transform
        features = list(range(93))
        if mode == "test":
            data = data[:,features]
            self.data = torch.from_numpy(data).float()
        else:
            target = data[:,-1]
            data = data[:,features]
            if mode == 'train':
                indices = [i for i in range(len(data)) if i % 10 != 0]
            elif mode == 'dev':
                indices = [i for i in range(len(data)) if i % 10 == 0]
            self.data = torch.from_numpy(data[indices]).float()
            self.target = torch.from_numpy(target[indices]).float()
        # 归一化
        self.data[:, 40:] = \
            (self.data[:, 40:] - self.data[:, 40:].mean(dim=0, keepdim=True)) \
            / self.data[:, 40:].std(dim=0, keepdim=True)
        self.dim = self.data.shape[1]
        print(f"读入{mode}数据,数据长度为{len(self.data)},数据维度为{self.dim}")
    def __len__(self):
        return len(self.data)
    def __getitem__(self, index):
        if self.mode == 'train':
            return self.data[index], self.target[index]
        else:
            return self.data[index]

In [265]:
convid_dataset = ConvidDataset('covid.train.csv')
convid_dataLoader = DataLoader(convid_dataset,shuffle=True,batch_size=270)

读入train数据,数据长度为2429,数据维度为93


In [266]:
# 定义神经网络
class CovidNetWork(nn.Module):
    def __init__(self,input_dim):
        super(CovidNetWork,self).__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim,64),
            nn.ReLU(),
            nn.Linear(64,1),
        )
        self.criterion = nn.MSELoss(reduction='mean')
    def forward(self,x):
        return self.net(x).squeeze(1)
    def calculate_loss(self,pred, target):
        return self.criterion(pred, target)


In [267]:
def dev(dv_set, model, device):
    model.eval()                                # set model to evalutation mode
    total_loss = 0
    for x, y in dv_set:                         # iterate through the dataloader
        x, y = x.to(device), y.to(device)       # move data to device (cpu/cuda)
        with torch.no_grad():                   # disable gradient calculation
            pred = model(x)                     # forward pass (compute output)
            mse_loss = model.cal_loss(pred, y)  # compute loss
        total_loss += mse_loss.detach().cpu().item() * len(x)  # accumulate loss
    total_loss = total_loss / len(dv_set.dataset)              # compute averaged loss

    return total_loss

In [268]:
dev_dataset = ConvidDataset('covid.test.csv',"dev")
dev_dataLoader = DataLoader(dev_dataset,shuffle=True,batch_size=270)

读入dev数据,数据长度为90,数据维度为93


In [269]:
# 训练数据
model = CovidNetWork(convid_dataset.dim)
print(convid_dataset.dim)
# 优化器
# optimizer = optim.Adam(model.parameters(),lr=0.001)
optimizer = getattr(torch.optim, "SGD")(
    model.parameters(), lr=0.01, momentum=0.9)
epochs = 1500
epoch = 0
min_mse = 1000
while epoch < epochs:
        model.train()                           # set model to training mode
        for x, y in convid_dataLoader:                     # iterate through the dataloader
            optimizer.zero_grad()               # set gradient to zero
            x, y = x.to(device), y.to(device)   # move data to device (cpu/cuda)
            pred = model(x)                     # forward pass (compute output)
            mse_loss = model.calculate_loss(pred, y)  # compute loss
            mse_loss.backward()                 # compute gradient (backpropagation)
            optimizer.step()                    # update model with optimizer

        # After each epoch, test your model on the validation (development) set.
        dev_mse = dev(dev_dataLoader, model, device)
        if dev_mse < min_mse:
            # Save model if your model improved
            min_mse = dev_mse
            print('Saving model (epoch = {:4d}, loss = {:.4f})'
                .format(epoch + 1, min_mse))
            early_stop_cnt = 0
        else:
            early_stop_cnt += 1

        epoch += 1
        if early_stop_cnt > 200:
            break
print('Finished training after {} epochs'.format(epoch))
# for epoch in range(epochs):
#     model.train()
#     optimizer.zero_grad()
#     pred = model(convid_dataset.data)
#     mse_loss = model.calculate_loss(pred, convid_dataset.target)
#     mse_loss.backward()
#     optimizer.step()
#     if epoch % 100 == 1:
#         print(f"epoch:{epoch},mse_loss:{mse_loss}")

93


ValueError: too many values to unpack (expected 2)

In [None]:
# 保存模型
torch.save(model.state_dict(),"covid.pth")

In [None]:
# 测试数据
convid_dataset = ConvidDataset('covid.test.csv',"test")
convid_dataLoader = DataLoader(convid_dataset,shuffle=True,batch_size=32)
model = CovidNetWork(convid_dataset.dim)
model.load_state_dict(torch.load("covid.pth"))
model.eval()
print(convid_dataset.dim)
# 优化器
model.train()
pred = model(convid_dataset.data)

print(pred[:10])