模型训练在Nvidia GPU平台上进行，运行使用py文件，输出放在output.txt文件内

In [None]:
import numpy as np
import torch 
import torch.nn as nn
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
import torch.nn.functional as F

import os
import pandas as pd
import matplotlib.pyplot as plt
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

步骤一：数据处理

In [None]:
class SUSY_Dataset(Dataset):
    def __init__(self, train_size, train):

        self.features=['SUSY','lepton 1 pT', 'lepton 1 eta', 'lepton 1 phi', 'lepton 2 pT', 'lepton 2 eta', 'lepton 2 phi', 
                'missing energy magnitude', 'missing energy phi', 'MET_rel', 'axial MET', 'M_R', 'M_TR_2', 'R', 'MT2', 
                'S_R', 'M_Delta_R', 'dPhi_r_b', 'cos(theta_r1)']
        #Number of datapoints to work with
        self.df = pd.read_csv("SUSY", header=None,nrows=5000000,engine='python')
        self.df.columns=self.features
        Y = self.df['SUSY']
        X = self.df[[col for col in self.df.columns if col!="SUSY"]]

        print(f"total sample: {len(Y)}")

        # set training and test data size
        if train==True:
            X=X[:train_size]
            Y=Y[:train_size]
            print("Training on {} examples".format(train_size))
        else:
            X=X[5000000-train_size:]
            Y=Y[5000000-train_size:]
            print("Testing on {} examples".format(train_size))
       
        self.data=(X.values.astype(np.float32),Y.values.astype(int))
        print("Using both high and low level features")
        print(f"num_samples: {len(self.data[1])}")
        
    def __len__(self):
        return len(self.data[1])

    def __getitem__(self, idx):
        sample=(self.data[0][idx],self.data[1][idx])
        return sample

步骤二：建立网络

In [None]:
class model(nn.Module):
    def __init__(self, hidden_neuron, hidden_layer_num):
        # inherit attributes and methods of nn.Module
        super(model, self).__init__()
        self.hidden_neuron = hidden_neuron
        self.hidden_layer_num = hidden_layer_num

        self.fc = []
        self.fc.append(nn.Linear(18, hidden_neuron))
        for i in range(hidden_layer_num-1):
            self.fc.append(nn.Linear(hidden_neuron, hidden_neuron))
        self.fc.append(nn.Linear(hidden_neuron, 2))
        self.fc.append(nn.Sigmoid())
        self.layers = nn.ModuleList(self.fc) 


    def forward(self, x):
        x = self.fc[0](x)
        for i in range(self.hidden_layer_num-1):
            x = self.fc[i+1](x)
        x = self.fc[len(self.fc)-2](x)  
        x = self.fc[len(self.fc)-1](x) 

        return x

步骤三：定义损失函数与优化器

In [None]:
def evaluate_model(epochs, hidden_neuron, hidden_layer_num, train_loader, test_loader,learning_rate):

    DNN = model(hidden_neuron, hidden_layer_num)
    # 初始化参数
    for layer in DNN.layers:
        if type(layer) == nn.Linear:
            layer.weight.data.normal_(0, 0.1)
            layer.bias.data.fill_(0)
    DNN = DNN.to(device)
    criterion = nn.CrossEntropyLoss()
    criterion = criterion.to(device)
    optimizer = torch.optim.SGD(DNN.parameters(), lr=learning_rate)
    
    
    def train(epochs,optimizer,criterion,train_loader,test_loader):
        DNN_cuda = nn.DataParallel(DNN, device_ids=[0,1,2,3])
        criterion = nn.DataParallel(criterion, device_ids=[0,1,2,3])
        optimizer = nn.DataParallel(optimizer, device_ids=[0,1,2,3])
        for batch_idx, (data, label) in enumerate(train_loader):
            data = data.to(device)
            label = label.to(device)
            optimizer.zero_grad()
            
            output = DNN_cuda.module(data).to(device)
            loss = criterion.module(output, label).to(device)
            loss.backward()
            optimizer.module.step()
            
            # print loss at current epoch
            if batch_idx % 5 == 0:
                print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                    epoch, batch_idx * len(data), len(train_loader.dataset),
                    100. * batch_idx / len(train_loader), loss.item() ))
            

        return loss.item()

    def test(optimizer,criterion,train_loader,test_loader):
        test_loss = 0 # loss function on test data
        correct = 0 # number of correct predictions
        DNN_cuda = nn.DataParallel(DNN, device_ids=[0,1,2,3])
        criterion = nn.DataParallel(criterion, device_ids=[0,1,2,3])
        optimizer = nn.DataParallel(optimizer, device_ids=[0,1,2,3])
        # loop over test data
        for data, label in test_loader:
            data = data.to(device)
            label = label.to(device)
            # compute model prediction softmax probability
            
            output = DNN_cuda.module(data).to(device)
            # compute test loss
            test_loss += criterion.module(output, label).to(device).item() # sum up batch loss
            # find most likely prediction
            pred = output.data.max(1, keepdim=True)[1] # get the index of the max log-probability
            # update number of correct predictions
            correct += pred.eq(label.data.view_as(pred)).sum().item()

        # print test loss
        test_loss /= len(test_loader.dataset)
        
        print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.3f}%)\n'.format(
            test_loss, correct, len(test_loader.dataset),
            100. * correct / len(test_loader.dataset)))
        

        return test_loss, correct / len(test_loader.dataset)

    train_loss=np.zeros(epochs)
    test_loss=np.zeros_like(train_loss)
    test_accuracy=np.zeros_like(train_loss)

    epochs=range(1, epochs + 1)
    for epoch in epochs:
        train_loss[epoch-1] = train(epoch,optimizer,criterion,train_loader,test_loader)
        test_loss[epoch-1], test_accuracy[epoch-1] = test(optimizer,criterion,train_loader,test_loader)

    return test_accuracy[-1]

步骤四：调用函数完成实验

使用单层隐藏神经元 1000 个，研究预言正确率与训练样本大小的关系，训练样本数目范围1000->4500000，画出关系图

In [None]:
train_size = np.linspace(1000, 4500000, 10).astype(int)
correctness = []
testset = SUSY_Dataset(500000, False)
test_loader = DataLoader(
        testset,
        batch_size=64, shuffle=True)

for size in train_size:
    trainset = SUSY_Dataset(size, True)
    train_loader = DataLoader(
        trainset,
        batch_size=1000, shuffle=True)
    
    correctness.append(evaluate_model(30, 1000, 1, train_loader, test_loader,0.05))

In [None]:
plt.figure(figsize=(10,10), dpi=100)
plt.plot(train_size, correctness,'-bo')
plt.xlabel("hiddenlayer_num")
plt.ylabel("correctness")
plt.savefig("pic1.png")
plt.show()

固定隐藏层神经元每层100个，研究正确率与隐藏层数的关系，层数范围1-5，画出关系图

In [None]:
hiddenlayer_num = range(1,6)
correctness = []
trainset = SUSY_Dataset(4500000, True)
testset = SUSY_Dataset(500000, False)
train_loader = DataLoader(
        trainset,
        batch_size=64, shuffle=True)
    
test_loader = DataLoader(
        testset,
        batch_size=1000, shuffle=False)

for num in hiddenlayer_num:
    correctness.append(evaluate_model(10, 100, num, train_loader, test_loader,0.05))

In [None]:
plt.figure(figsize=(5,5), dpi=100)
plt.plot(hiddenlayer_num, correctness,'-bo')
plt.xlabel("hiddenlayer_num")
plt.ylabel("correctness")
plt.savefig("pic2.png")
plt.show()