In [2]:
import torch
from torch import nn
from torch.nn import functional as F
from torch.utils import data # 获取迭代数据
from torch.utils.data import Dataset,TensorDataset,DataLoader,random_split
from torch.autograd import Variable # 获取变量

import torch.optim as optim
from torch.optim.lr_scheduler import ReduceLROnPlateau

from torchensemble import VotingClassifier,FusionClassifier

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import datetime

In [3]:
#setting
batch_size = 16
learning_rate = 0.01
epochs = 10
clip = 0.01

## load data

In [4]:
all_data = np.load("../input_data/data.npy")
all_label = np.load("../input_data/label.npy")

In [5]:
all_data = torch.from_numpy(all_data)
all_label = torch.from_numpy(all_label)
all_data=all_data.float()
all_label=all_label.long()
#pack to dataset
dataset=TensorDataset(all_data,all_label)


In [11]:
#划分训练集和测试集

seed = 30
num_train_all = int(len(dataset) * 0.9)
num_test = len(dataset) - num_train_all
num_train = int(num_train_all * 0.8)
num_val = num_train_all - num_train

train_dataset, validate_dataset, test_dataset = random_split(dataset, [num_train, num_val,num_test],torch.Generator().manual_seed(seed))

In [12]:
#Load to DataLoader
print("train_dataset:",len(train_dataset))
print("validate_dataset:",len(validate_dataset))
print("test_dataset:",len(test_dataset))
print("batch_size:",batch_size)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,batch_size=batch_size, shuffle=True)
validation_loader = torch.utils.data.DataLoader(dataset=validate_dataset,batch_size=batch_size, shuffle=True)


train_dataset: 11875
validate_dataset: 2969
test_dataset: 1650
batch_size: 16


## Model

In [13]:
no_cuda = True
use_cuda = not no_cuda and torch.cuda.is_available()
device = torch.device("cuda:0" if use_cuda else "cpu")

In [41]:
class EagleC_CNN(nn.Module):
    def __init__(self):
        super().__init__()
        #卷积
        self.features_ = nn.Sequential(nn.Conv2d(in_channels=1,out_channels=32,kernel_size=3,stride=1,padding=1)
                                        ,nn.ReLU(inplace=True)
                                       ,nn.MaxPool2d(2)
                                       
                                       ,nn.Conv2d(32,64,3,stride=1,padding=1)
                                        ,nn.ReLU(inplace=True)
                                       ,nn.MaxPool2d(2)
                                      )
        #分类
        #根据net输出的形状确定
        self.clf_ = nn.Sequential(nn.Dropout(0.5)
                                  ,nn.Linear(64*5*5,512)
                                  ,nn.ReLU(inplace=True)
                                 ,nn.Linear(512,6)
                                 ,nn.Sigmoid()
                                 )
    
    def forward(self,x):
        x = self.features_(x) #用特征提取的架构提取特征
        x = x.view(-1,64*5*5) #调整数据结构，拉平数据
        output = self.clf_(x)
        return output

In [24]:
net = nn.Sequential(nn.Conv2d(in_channels=1,out_channels=32,kernel_size=3,stride=1,padding=1)
                                        ,nn.ReLU(inplace=True)
                                       ,nn.MaxPool2d(2)
                                       
                                       ,nn.Conv2d(32,64,3,stride=1,padding=1)
                                        ,nn.ReLU(inplace=True)
                                       ,nn.MaxPool2d(2)
                                      )

In [36]:
data = torch.ones(1,1,21,21)

In [38]:
net(data).shape

torch.Size([1, 64, 5, 5])

In [42]:
model = VotingClassifier(
    estimator=EagleC_CNN,
    n_estimators=5,
    cuda=False,
)

# model = EagleC_CNN().to(device)

In [43]:

#多分类
criterion = nn.CrossEntropyLoss()
model.set_criterion(criterion)
#优化器
# optimizer = optim.Adam(model.parameters(), lr=learning_rate)
model.set_optimizer('Adam',             # parameter optimizer
                    lr=1e-3,            # learning rate of the optimizer
                    weight_decay=5e-4)  # weight decay of the optimizer

## Train

In [20]:
scheduler = ReduceLROnPlateau(optimizer, 'min')

In [44]:
model.fit(train_loader=train_loader,  # training data
          epochs=4) 

Estimator: 000 | Epoch: 000 | Batch: 000 | Loss: 1.79046 | Correct: 3/16
Estimator: 000 | Epoch: 000 | Batch: 100 | Loss: 1.40906 | Correct: 8/16
Estimator: 000 | Epoch: 000 | Batch: 200 | Loss: 1.52825 | Correct: 6/16
Estimator: 000 | Epoch: 000 | Batch: 300 | Loss: 1.47766 | Correct: 7/16
Estimator: 000 | Epoch: 000 | Batch: 400 | Loss: 1.54604 | Correct: 5/16
Estimator: 000 | Epoch: 000 | Batch: 500 | Loss: 1.40570 | Correct: 10/16
Estimator: 000 | Epoch: 000 | Batch: 600 | Loss: 1.53066 | Correct: 7/16
Estimator: 000 | Epoch: 000 | Batch: 700 | Loss: 1.43931 | Correct: 5/16
Estimator: 001 | Epoch: 000 | Batch: 000 | Loss: 1.79627 | Correct: 0/16
Estimator: 001 | Epoch: 000 | Batch: 100 | Loss: 1.57077 | Correct: 2/16
Estimator: 001 | Epoch: 000 | Batch: 200 | Loss: 1.56900 | Correct: 5/16
Estimator: 001 | Epoch: 000 | Batch: 300 | Loss: 1.46730 | Correct: 4/16
Estimator: 001 | Epoch: 000 | Batch: 400 | Loss: 1.47705 | Correct: 5/16
Estimator: 001 | Epoch: 000 | Batch: 500 | Loss: 1

NameError: name 'test_loader' is not defined

In [29]:
def train(model, device, train_loader, optimizer, clip,criterion):
	model.train()
	#损失
	loss_sum = 0.0
	for i, (data, target) in enumerate(train_loader):
		if i == (len(train_loader) - 1):
			continue
		data, target = Variable(data).to(device), Variable(target, requires_grad=False).to(device)
		#将模型的参数梯度初始化为0
		optimizer.zero_grad()
		#得到结果
		output = model(data)
		# loss function
		loss = criterion(output, target)
		loss.backward()
		#获取当前lr
		lr_current = get_lr(optimizer)
		clip2 = clip/lr_current
		#梯度裁剪
		nn.utils.clip_grad_norm_(model.parameters(),clip2)
		optimizer.step() #更新参数
		loss_sum = loss_sum + loss.item()
	#平均的损失函数
	return loss_sum/i
	
# validation
def validate(model, device, validation_loader,criterion):
	model.eval()
	loss_sum = 0.0
	with torch.no_grad():
		for i, (data, target) in enumerate(validation_loader):
			data, target = Variable(data).to(device), Variable(target, requires_grad=False).to(device)
			output = model(data)
			# loss function
			loss = criterion(output, target)
			#计算损失函数
			loss_sum = loss_sum + loss.item()
	#平均的损失函数
	return loss_sum/i

# get current learning rate
def get_lr(optimizer):
	for param_group in optimizer.param_groups:
		return param_group['lr']

In [32]:
#训练
for epoch in range(1, epochs):
    loss_train = train(model, device, train_loader, optimizer,clip,criterion) 
    loss_validate = validate(model, device, validation_loader,criterion)
    scheduler.step(loss_validate)	
    lr_current = get_lr(optimizer)
    print("epoch:{},lr:{},loss train:{},loss validate:{}".format(epoch, lr_current, np.round(loss_train,3), np.round(loss_validate,3)) ) 
    # save the model
    # torch.save(model.state_dict(), "model_epoch" + str(epoch))

NameError: name 'optimizer' is not defined

In [23]:
#save model
PATH = 'model.pth'
torch.save(model.state_dict(), PATH)

## Predict

In [46]:
test_loader = torch.utils.data.DataLoader(dataset=test_dataset,batch_size=batch_size, shuffle=False)

In [47]:
accuracy = model.predict(test_loader)

ValueError: The type of input X should be one of {{torch.Tensor, np.ndarray}}.

In [27]:
net = EagleC_CNN()
net.load_state_dict(torch.load(PATH))

<All keys matched successfully>

In [29]:
correct = 0
total = 0
# since we're not training, we don't need to calculate the gradients for our outputs
with torch.no_grad():
    for data in test_loader:
        images, labels = data
        # calculate outputs by running images through the network
        outputs = net(images)
        # the class with the highest energy is what we choose as prediction
        # 人为最大的就是分到的类
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy of the network on test: {100 * correct // total} %')
#还是比随机结果要好一些的

Accuracy of the network on test: 35 %
