In [16]:
from torch import empty
import math
import torch

In [17]:
torch.set_grad_enabled(False)

<torch.autograd.grad_mode.set_grad_enabled at 0x119954400>

In [18]:
def generate_data(n=1000):
    inputs = empty(n, 2)
    inputs = empty(n, 2).uniform_(0,1)
    labels = (inputs - 0.5).pow(2).sum(1)<1/(2*math.pi)
    return inputs, labels

In [19]:
def split_dataset(inputs, lables, train_ratio=0.7, val_ratio=0.1, test_ratio=0.2):
    train_len = math.floor(inputs.size()[0] * train_ratio)
    val_len = math.floor(inputs.size()[0] * val_ratio)
    test_len = inputs.size()[0]-train_len-val_len
    
    train_inputs = inputs.narrow(0, 0, train_len)
    train_labels = labels.narrow(0, 0, train_len)
    
    validation_inputs = inputs.narrow(0, train_len, val_len)
    validation_labels = labels.narrow(0, train_len, val_len)
    
    test_inputs = inputs.narrow(0, train_len+val_len, test_len)
    test_labels = labels.narrow(0, train_len+val_len, test_len)
    
    return train_inputs, train_labels, validation_inputs, validation_labels, test_inputs, test_labels

In [20]:
def convert_labels(labels):
    result = empty(labels.size(0), 2)
    for i in range(labels.size(0)):
        if labels[i] == 0:
            result[i, 0].fill_(1)
            result[i, 1].fill_(-1)
        else:
            result[i, 0].fill_(-1)
            result[i, 1].fill_(1)
    return result

In [21]:
class Module (object) :
    """
    Base class for other neural network modules to inherit from
    """
    
    def __init__(self):
        self.author = 'me'
    
    def forward ( self , input ) :
        """ `forward` should get for input, and returns, a tensor or a tuple of tensors """
        raise NotImplementedError
        
    def backward ( self , gradwrtoutput ) :
        """
        `backward` should get as input a tensor or a tuple of tensors containing the gradient of the loss 
        with respect to the module’s output, accumulate the gradient wrt the parameters, and return a 
        tensor or a tuple of tensors containing the gradient of the loss wrt the module’s input.
        """
        raise NotImplementedError
        
    def param ( self ) :
        """ 
        `param` should return a list of pairs, each composed of a parameter tensor, and a gradient tensor 
        of same size. This list should be empty for parameterless modules (e.g. activation functions). 
        """
        return []

In [22]:
class Linear(Module):
    
    def __init__(self, input_dim, output_dim, std=1):
        super().__init__()
        self.weight = empty(output_dim, input_dim).normal_(0, std)
        self.bias = empty(output_dim).normal_(0, std)
        self.dw = empty(self.weight.size()).zero_()
        self.db = empty(self.bias.size()).zero_()
        self.x = 0
         
    def forward(self, input_):
        self.x = input_
        return self.weight.mv(self.x) + self.bias
    
    def backward(self, grdwrtoutput):
        self.dw.add_(grdwrtoutput.view(-1,1).mm(self.x.view(1,-1)))
        self.db.add_(grdwrtoutput)
        return self.weight.t().mv(grdwrtoutput)
    
    def param (self):
        return [(self.weight, self.dw), (self.bias, self.db)]

In [23]:
class ReLu(Module):
    
    def __init__(self):
        super().__init__()
        self.input_ = 0
        
    def forward(self, input_):
        self.input_ = input_
        result = self.input_ * (self.input_ > 0).float()
        return result
    
    def backward(self, grdwrtoutput):
        return grdwrtoutput * (self.input_ > 0).float()    

    def param (self):
        return [(None, None)]  

In [24]:
class Tanh(Module):
    
    def __init__(self):
        super().__init__()
        self.input_ = 0
        
    def forward(self, input_):
        self.input_ = input_
        tanh = 2/(1+(-2*input_).exp())-1
        return tanh
    
    def backward(self, grdwrtoutput):
        result = 4*((self.input_.exp() + (-self.input_).exp()).pow(-2)) * grdwrtoutput
        return result
    
    def param (self):
        return [(None, None)]

In [25]:
class Sequential(Module):
    
    def __init__(self, *args):
        super().__init__()
        self.modules = []
        args = list(args)[0]
        for module in args:
            self.modules.append(module)
    
    def forward(self, input_):
        result = input_
        for module in self.modules:
            result = module.forward(result)
        return result
    
    def backward(self, grdwrtoutput):
        modules_re = self.modules[::-1]
        result = grdwrtoutput
        for module in modules_re:
            result = module.backward(result)
        return result
    
    def param ( self ) :
        parameters = []
        for module in self.modules:
            parameters.append(module.param())
        return parameters

In [26]:
def loss(pred, label):
    return (pred - label.float()).pow(2).sum()

def dloss(pred,label):
    return 2*(pred - label.float())

In [27]:
class SGD():
    def __init__(self, param, lr):
        self.param = param
        self.lr = lr
    
    def doit(self):
        for p in self.param:
            for tup in p:
                val, grad = tup
                if (val is None or grad is None):
                    continue
                val.add_(-self.lr * grad)
    def zero_grad(self):
        for p in self.param:
            for tup in p:
                val, grad = tup
                if (val is None or grad is None):
                    continue
                grad.zero_()

In [28]:
#做mini-batch stochastic gradient descent, sample还是一个一个地丢进model里面，
#但是一个batch梯度降一次，一个batch gradient清零一次 
def train_model(model, train_inputs, train_labels, validation_inputs, validation_labels, lr, epochs, batch_size = 1):
    """
    Trains the model, logs training- and validation error

    Output:
    model       :  Sequential object
    train error :  List object 
    validation error  :  List object 
    """   
    # make train targets and validation targets to 1-hot vector
    train_labels = convert_labels(train_labels)
    validation_labels = convert_labels(validation_labels)    
    
    
    # define optimizer
    sgd = SGD(model.param(), lr=lr)
    
    # constants
    nb_train_samples = train_inputs.size(0)
    nb_classes = train_labels.size(1)
    input_dim = train_inputs.size(1)
    nb_batches = int(nb_train_samples/batch_size)
    assert nb_train_samples%batch_size == 0#要求nb_train_samples必须被batch size整除
    
    # training in epochs
    train_error_list = []
    validation_error_list = []

    #做mini-batch stochastic gradient descent, sample还是一个一个地丢进model里面，
#但是一个batch梯度降一次，一个batch gradient清零一次 
    for epoch in range(epochs):

            # Training -------------------------------------------------------------------------------
            acc_loss = 0
            nb_train_errors = 0
            for i in range(nb_batches):
                dl_loss = 0
                sgd.zero_grad()
                # iterate through samples and accumelate derivatives
                for n in range(i*batch_size, (i+1)*batch_size):#对每一个epoch我们用的是SGD

                    ### In order to get nb_train_errors, check how many correctly classified

                    # Get index of correct one, by taking argmax
                    a_train_lables = train_labels[n]#取出了一个点的labels
                    train_labels_list = [a_train_lables[0], a_train_lables[1]]
                    correct = train_labels_list.index(max(train_labels_list))#这里是true classes


                    output = model.forward(train_inputs[n])

                    # Get index of the predicted of the two outputs, by taking argmax
                    output_list = [output[0], output[1]]

                    prediction = output_list.index(max(output_list))#哪个值大，就predict sample作为哪个class

                    # Check if predicted correctly
                    if int(correct) != int(prediction) : nb_train_errors += 1


                    ### Calculate loss 
                    acc_loss = acc_loss + loss(output, train_labels[n].float())#这个loss用于输出
                    dl_dloss = dl_loss+dloss(output, train_labels[n].float())#这个loss用于梯度降     
                model.backward(dl_dloss)
                sgd.doit()
            train_error_list.append((100 * nb_train_errors) / train_inputs.size(0))#对于每一个epoch都要求出一个train error和validation error


            # validationing --------------------------------------------------------------------------------
            nb_validation_errors = 0
        
            #对于每一个epoch求出测试集的loss，这里应该不用改
            for n in range(0, validation_inputs.size(0)):


                ### In order to get nb_train_errors, check how many correctly classified

                a_validation_target = validation_labels[n]
                validation_labels_list = [a_validation_target[0], a_validation_target[1]]
                correct = validation_labels_list.index(max(validation_labels_list)) # argmax

                ### Find which one is predicted of the two outputs, by taking argmax            

                output = model.forward(validation_inputs[n])#model只能丢进去一个一个的sample
                output_list = [output[0], output[1]]
                prediction = output_list.index(max(output_list))
                if int(correct) != int(prediction) : nb_validation_errors += 1


            if epoch%(epochs/20) == 0:
                print('{:d} acc_train_loss {:.02f} acc_train_error {:.02f}% validation_error {:.02f}%'
                  .format(epoch,
                          acc_loss,
                          (100 * nb_train_errors) / train_inputs.size(0),
                          (100 * nb_validation_errors) / validation_inputs.size(0)))#区别在于测试集没有参与梯度降
            validation_error_list.append((100 * nb_validation_errors) / validation_inputs.size(0))

    return model, train_error_list, validation_error_list


In [54]:
inputs, labels = generate_data(n=1000)
train_inputs, train_labels, validation_inputs, validation_labels, test_inputs, test_labels = \
split_dataset(inputs, labels, train_ratio=0.7, val_ratio=0.1, test_ratio=0.2) 


In [56]:
### Normalize data
mu, std = inputs.mean(), inputs.std()
train_inputs.sub_(mu).div_(std)
validation_inputs.sub_(mu).div_(std)
test_inputs.sub_(mu).div_(std)


### Create model
input_dim = 2
hidden_width = 25
output_dim = 2

model = Sequential([Linear(input_dim, hidden_width), ReLu(), Linear(hidden_width, hidden_width), ReLu(), Linear(hidden_width, hidden_width), ReLu(), Linear(hidden_width, output_dim), Tanh()])

### Train model and log training and validation error
model, train_error_list, test_error_list = train_model(model, train_inputs, train_labels, validation_inputs, validation_labels, 
                                                       lr = 0.0001, epochs=900, batch_size=5)

### Print final training error
print('train_error {:.02f}%'.format(train_error_list[-1]))


0 acc_train_loss 3154.48 acc_train_error 58.00% validation_error 60.00%
45 acc_train_loss 2840.02 acc_train_error 51.57% validation_error 50.00%
90 acc_train_loss 2804.35 acc_train_error 50.29% validation_error 48.00%
135 acc_train_loss 2632.09 acc_train_error 45.57% validation_error 46.00%
180 acc_train_loss 2430.08 acc_train_error 40.14% validation_error 36.00%
225 acc_train_loss 2254.38 acc_train_error 45.71% validation_error 40.00%
270 acc_train_loss 2288.80 acc_train_error 43.00% validation_error 37.00%
315 acc_train_loss 2263.12 acc_train_error 43.00% validation_error 36.00%
360 acc_train_loss 2243.33 acc_train_error 43.57% validation_error 37.00%
405 acc_train_loss 2242.34 acc_train_error 43.29% validation_error 37.00%
450 acc_train_loss 2241.88 acc_train_error 43.29% validation_error 37.00%
495 acc_train_loss 2241.60 acc_train_error 43.14% validation_error 37.00%
540 acc_train_loss 2241.34 acc_train_error 43.00% validation_error 37.00%
585 acc_train_loss 2241.09 acc_train_error

In [32]:
def test_model(model, test_inputs, test_targets):
    """
    Test the model and prints the test error
    """   
    
    # make test targets to 1-hot vector
    test_targets = convert_to_one_hot_labels(test_inputs, test_targets)    
    
    test_error_list = []
    
    nb_test_errors = 0

    for n in range(0, test_inputs.size(0)):


        ### In order to get nb_train_errors, check how many correctly classified
        a_test_target = test_targets[n]
        test_targets_list = [a_test_target[0], a_test_target[1]]
        correct = test_targets_list.index(max(test_targets_list)) # argmax

        ### Find which one is predicted of the two outputs, by taking argmax            
        output = model.forward(test_inputs[n])#model只能允许sample一个一个进去嘛
        output_list = [output[0], output[1]]
        prediction = output_list.index(max(output_list))
        if int(correct) != int(prediction) : nb_test_errors += 1


    print('test_error {:.02f}%'.format(((100 * nb_test_errors) / test_inputs.size(0))))
    test_error_list.append((100 * nb_test_errors) / test_inputs.size(0))
    return