In [1]:
import torch
from torch import nn
from torch.nn import functional as F
from torch.utils import data  # 获取迭代数据
from torch.utils.data import Dataset, TensorDataset, DataLoader, random_split
from torch.autograd import Variable  # 获取变量

import torch.optim as optim
from torch.optim.lr_scheduler import ReduceLROnPlateau

from torchensemble import VotingClassifier, FusionClassifier

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import datetime
import time

In [2]:
#setting
batch_size = 16
learning_rate = 0.01
epochs = 10
clip = 0.01

no_cuda = False
use_cuda = not no_cuda and torch.cuda.is_available()
device = torch.device("cuda:0" if use_cuda else "cpu")

In [3]:
#imputation后的

all_data = np.load("../input_data/data_imputation.npy")
all_label = np.load("../input_data/label_imputation_binary.npy")

In [4]:
#包装成数据集
all_data = torch.from_numpy(all_data).to(device)
all_label = torch.from_numpy(all_label).to(device)

all_data = all_data.float()
all_label = all_label.long()
#pack to dataset
dataset = TensorDataset(all_data, all_label)


In [5]:
#划分训练集和测试集

seed = 30
split_ratio = 0.8

num_train = int(len(dataset) * split_ratio)
num_test = len(dataset) - num_train

train_dataset, test_dataset = random_split(dataset, [num_train, num_test],
                                                             torch.Generator().manual_seed(seed))
#Load to DataLoader
print("train_dataset:", len(train_dataset))
print("test_dataset:", len(test_dataset))
print("batch_size:", batch_size)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)



train_dataset: 16444
test_dataset: 4112
batch_size: 16


## model

In [6]:
class EagleC_CNN(nn.Module):
    def __init__(self):
        super().__init__()
        #卷积
        self.features_ = nn.Sequential(nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, stride=1, padding=1)
                                       , nn.ReLU(inplace=True)
                                       , nn.MaxPool2d(2)

                                       , nn.Conv2d(32, 64, 3, stride=1, padding=1)
                                       , nn.ReLU(inplace=True)
                                       , nn.MaxPool2d(2)
                                       )
        #分类
        #根据net输出的形状确定
        self.clf_ = nn.Sequential(nn.Dropout(0.5)
                                  , nn.Linear(64 * 5 * 5, 512)
                                  , nn.ReLU(inplace=True)
                                  , nn.Linear(512, 6)
                                  , nn.Sigmoid()
                                  )

    def forward(self, x):
        x = self.features_(x)  #用特征提取的架构提取特征
        x = x.view(-1, 64 * 5 * 5)  #调整数据结构，拉平数据
        output = self.clf_(x)
        return output

## train

In [7]:
def IterOnce(net,criterion,opt,x,y):
    '''
    对模型进行一次迭代的函数
    net:实例化后的架构
    criterion:损失函数
    opt:优化器
    x:一个batch中所有的样本
    y:一个batch中所有样本的真实标签
    '''
    sigma = net.forward(x)
    loss = criterion(sigma, y)
    loss.backward()
    opt.step()
    opt.zero_grad(set_to_none=True)#节省内存
    #预测的标签
    yhat = torch.max(sigma, 1)[1]
    correct = torch.sum(yhat == y)
    return correct,loss

def TestOnce(net,criterion,x,y):
    '''
    对一组数据进行测试并输出测试结果的函数
    net:训练之后的架构
    criterion：损失函数
    x：要测试的数据的所有样本
    y:要测试的数据的真实标签
    '''
    with torch.no_grad():
        sigma = net.forward(x)
        loss = criterion(sigma, y)
        yhat = torch.max(sigma, 1)[1]
        correct = torch.sum(yhat == y)
    return correct,loss



In [None]:
#提前停止
class EarlyStopping():
    ''' 
    在测试集上的损失连续几个epochs不再降低的时候，提前停止
    val_loss：测试集/验证集上这个epoch的损失
    '''

    def __init__(self,patience=5,tol=0.0005):
        ''' 
        patience:连续patience个epochs上损失不再降低的时候，停止迭代
        tol:阈值，当新损失与旧损失之间的差异小于tol时，认为模型不再提升
        '''