In [1]:
from torch import empty
import math
import torch
import numpy as np

In [2]:
torch.set_grad_enabled(False)

In [59]:
def generate_data(n=1000):
    inputs = empty(n, 2)
    torch.manual_seed(123)
    inputs = empty(n, 2).uniform_(0,1)
    labels = (inputs - 0.5).pow(2).sum(1)<1/(2*math.pi)
    return inputs, labels

In [4]:
def split_dataset(inputs, lables, train_ratio=0.7, val_ratio=0.1, test_ratio=0.2):
    train_len = math.floor(inputs.size()[0] * train_ratio)
    val_len = math.floor(inputs.size()[0] * val_ratio)
    test_len = inputs.size()[0]-train_len-val_len
    
    train_inputs = inputs.narrow(0, 0, train_len)
    train_labels = labels.narrow(0, 0, train_len)
    
    validation_inputs = inputs.narrow(0, train_len, val_len)
    validation_labels = labels.narrow(0, train_len, val_len)
    
    test_inputs = inputs.narrow(0, train_len+val_len, test_len)
    test_labels = labels.narrow(0, train_len+val_len, test_len)
    
    return train_inputs, train_labels, validation_inputs, validation_labels, test_inputs, test_labels

In [5]:
def convert_labels(labels):
    result = empty(labels.size(0), 2)
    for i in range(labels.size(0)):
        if labels[i] == 0:
            result[i, 0].fill_(1)
            result[i, 1].fill_(-1)
        else:
            result[i, 0].fill_(-1)
            result[i, 1].fill_(1)
    return result

In [6]:
class Module (object) :
    """
    Base class for other neural network modules to inherit from
    """
    
    def __init__(self):
        self.author = 'me'
    
    def forward ( self , input ) :
        """ `forward` should get for input, and returns, a tensor or a tuple of tensors """
        raise NotImplementedError
        
    def backward ( self , gradwrtoutput ) :
        """
        `backward` should get as input a tensor or a tuple of tensors containing the gradient of the loss 
        with respect to the module’s output, accumulate the gradient wrt the parameters, and return a 
        tensor or a tuple of tensors containing the gradient of the loss wrt the module’s input.
        """
        raise NotImplementedError
        
    def param ( self ) :
        """ 
        `param` should return a list of pairs, each composed of a parameter tensor, and a gradient tensor 
        of same size. This list should be empty for parameterless modules (e.g. activation functions). 
        """
        return []

In [60]:
class Linear(Module):
    
    def __init__(self, input_dim, output_dim, std=1):
        super().__init__()
        torch.manual_seed(123)
        self.weight = empty(output_dim, input_dim).normal_(0, std)
        self.bias = empty(output_dim).normal_(0, std)
        self.dw = empty(self.weight.size()).zero_()
        self.db = empty(self.bias.size()).zero_()
        self.x = 0
         
    def forward(self, input_):
        self.x = input_
        return self.weight.mv(self.x) + self.bias
    
    def backward(self, grdwrtoutput):
        self.dw.add_(grdwrtoutput.view(-1,1).mm(self.x.view(1,-1)))
        self.db.add_(grdwrtoutput)
        return self.weight.t().mv(grdwrtoutput)
    
    def param (self):
        return [(self.weight, self.dw), (self.bias, self.db)]

In [8]:
class ReLu(Module):
    
    def __init__(self):
        super().__init__()
        self.input_ = 0
        
    def forward(self, input_):
        self.input_ = input_
        result = self.input_ * (self.input_ > 0).float()
        return result
    
    def backward(self, grdwrtoutput):
        return grdwrtoutput * (self.input_ > 0).float()    

    def param (self):
        return [(None, None)]  

In [9]:
class Tanh(Module):
    
    def __init__(self):
        super().__init__()
        self.input_ = 0
        
    def forward(self, input_):
        self.input_ = input_
        tanh = 2/(1+(-2*input_).exp())-1
        return tanh
    
    def backward(self, grdwrtoutput):
        result = 4*((self.input_.exp() + (-self.input_).exp()).pow(-2)) * grdwrtoutput
        return result
    
    def param (self):
        return [(None, None)]

In [10]:
class Sequential(Module):
    
    def __init__(self, *args):
        super().__init__()
        self.modules = []
        args = list(args)[0]
        for module in args:
            self.modules.append(module)
    
    def forward(self, input_):
        result = input_
        for module in self.modules:
            result = module.forward(result)
        return result
    
    def backward(self, grdwrtoutput):
        modules_re = self.modules[::-1]
        result = grdwrtoutput
        for module in modules_re:
            result = module.backward(result)
        return result
    
    def param ( self ) :
        parameters = []
        for module in self.modules:
            parameters.append(module.param())
        return parameters

In [11]:
def loss(pred, label):
    return (pred - label.float()).pow(2).sum()

def dloss(pred,label):
    return 2*(pred - label.float())

In [12]:
class SGD():
    def __init__(self, param, lr):
        self.param = param
        self.lr = lr
    
    def doit(self):
        for p in self.param:
            for tup in p:
                val, grad = tup
                if (val is None or grad is None):
                    continue
                val.add_(-self.lr * grad)
    def zero_grad(self):
        for p in self.param:
            for tup in p:
                val, grad = tup
                if (val is None or grad is None):
                    continue
                grad.zero_()

In [185]:
def train_model(model, train_inputs, train_labels, validation_inputs, validation_labels, lr, epochs, batch_size = 1):
    """
    Trains the model, logs training- and validation error

    Output:
    model       :  Sequential object
    train error :  List object 
    validation error  :  List object 
    """   
    # make train targets and validation targets to 1-hot vector
    train_labels_conv = convert_labels(train_labels)
    validation_labels_conv = convert_labels(validation_labels)    
    
    
    # define optimizer
    sgd = SGD(model.param(), lr=lr)
    
    # constants
    nb_train_samples = train_inputs.size(0)
    nb_classes = train_labels_conv.size(1)
    input_dim = train_inputs.size(1)
    nb_batches = int(nb_train_samples/batch_size)
    assert nb_train_samples%batch_size == 0#要求nb_train_samples必须被batch size整除
    
    # training in epochs
    train_error_list = []
    validation_error_list = []


    for epoch in range(epochs):

        # Training -------------------------------------------------------------------------------
        acc_loss = 0
        nb_train_errors = 0
        for i in range(nb_batches):
            dl_loss = 0
            # iterate through samples and accumelate derivatives
            for n in range(i*batch_size, (i+1)*batch_size):
                ### In order to get nb_train_errors, check how many correctly classified

                # Get index of correct one, by taking argmax
                a_train_lable = train_labels_conv[n]
                train_labels_list = [a_train_lable[0], a_train_lable[1]]


                output = model.forward(train_inputs[n])

                # Get index of the predicted of the two outputs, by taking argmax
                output_list = [output[0], output[1]]

                prediction = output_list.index(max(output_list))#哪个值大，就predict sample作为哪个class

                # Check if predicted correctly
                if int(train_labels[n]) != int(prediction) : nb_train_errors += 1


                ### Calculate loss 
                acc_loss = acc_loss + loss(output, train_labels_conv[n].float())#Calculate overall loss
                dl_dloss = dl_loss+dloss(output, train_labels_conv[n].float()) 
            model.backward(dl_dloss/batch_size)
            sgd.doit()
            sgd.zero_grad()
        train_error_list.append((100 * nb_train_errors) / train_inputs.size(0))

        # validationing --------------------------------------------------------------------------------
        nb_validation_errors = 0

        for n in range(0, validation_inputs.size(0)):


            ### In order to get nb_train_errors, check how many correctly classified

            a_validation_label = validation_labels_conv[n]
            validation_labels_list = [a_validation_label[0], a_validation_label[1]]

            ### Find which one is predicted of the two outputs, by taking argmax            

            output = model.forward(validation_inputs[n])
            output_list = [output[0], output[1]]
            prediction = output_list.index(max(output_list))
            if int(validation_labels[n]) != int(prediction) : nb_validation_errors += 1


        if epoch%(epochs/10) == 0:
            print('{:d} train_loss {:.02f} acc_train_error {:.02f}% validation_error {:.02f}%'
              .format(epoch,
                      acc_loss,
                      (100 * nb_train_errors) / train_inputs.size(0),
                      (100 * nb_validation_errors) / validation_inputs.size(0)))
        validation_error_list.append((100 * nb_validation_errors) / validation_inputs.size(0))

    return model, train_error_list, validation_error_list


In [156]:
inputs, labels = generate_data(n=1000)
train_inputs, train_labels, validation_inputs, validation_labels, test_inputs, test_labels = \
split_dataset(inputs, labels, train_ratio=0.7, val_ratio=0.1, test_ratio=0.2) 


In [192]:
### Normalize data
mu, std = inputs.mean(), inputs.std()
train_inputs.sub_(mu).div_(std)
validation_inputs.sub_(mu).div_(std)
test_inputs.sub_(mu).div_(std)


### Create model
input_dim = 2
hidden_width = 25
output_dim = 2

model = Sequential([Linear(input_dim, hidden_width), ReLu(), Linear(hidden_width, hidden_width), ReLu(), Linear(hidden_width, hidden_width), Tanh(), Linear(hidden_width, output_dim), Tanh()])

### Train model and log training and validation error
model, train_error_list, test_error_list = train_model(model, train_inputs, train_labels, validation_inputs, validation_labels, 
                                                       lr = 0.0001, epochs=1500, batch_size=5)

### Print final training error
print('train_error {:.02f}%'.format(train_error_list[-1]))


0 log_train_loss 2804.10 acc_train_error 62.57% validation_error 60.00%
150 log_train_loss 2171.80 acc_train_error 55.57% validation_error 53.00%
300 log_train_loss 1808.71 acc_train_error 43.00% validation_error 32.00%
450 log_train_loss 1690.33 acc_train_error 39.57% validation_error 29.00%
600 log_train_loss 1600.04 acc_train_error 36.71% validation_error 29.00%
750 log_train_loss 1545.93 acc_train_error 34.00% validation_error 25.00%
900 log_train_loss 1518.72 acc_train_error 34.00% validation_error 23.00%
1050 log_train_loss 1489.29 acc_train_error 34.00% validation_error 22.00%
1200 log_train_loss 1442.21 acc_train_error 32.86% validation_error 20.00%
1350 log_train_loss 1335.88 acc_train_error 31.57% validation_error 22.00%
train_error 31.14%


In [16]:
def test_model(model, test_inputs, test_targets):
    """
    Test the model and prints the test error
    """   
    
    # make test targets to 1-hot vector
    test_targets = convert_to_one_hot_labels(test_inputs, test_targets)    
    
    test_error_list = []
    
    nb_test_errors = 0

    for n in range(0, test_inputs.size(0)):


        ### In order to get nb_train_errors, check how many correctly classified
        a_test_target = test_targets[n]
        test_targets_list = [a_test_target[0], a_test_target[1]]
        correct = test_targets_list.index(max(test_targets_list)) # argmax

        ### Find which one is predicted of the two outputs, by taking argmax            
        output = model.forward(test_inputs[n])#model只能允许sample一个一个进去嘛
        output_list = [output[0], output[1]]
        prediction = output_list.index(max(output_list))
        if int(correct) != int(prediction) : nb_test_errors += 1


    print('test_error {:.02f}%'.format(((100 * nb_test_errors) / test_inputs.size(0))))
    test_error_list.append((100 * nb_test_errors) / test_inputs.size(0))
    return

In [190]:
def train_model2(train_inputs, train_targets, test_inputs, test_targets, model, learning_rate=0.001, epochs=100, batch_size=1):
    """
    Trains the model, logs training- and validation error

    Output:
    model       :  Sequential object
    train error :  List object 
    test error  :  List object 
    """   
    # make train targets and test targets to 1-hot vector
    train_targets = convert_labels(train_targets)
    test_targets = convert_labels(test_targets)    
    
    
    # define optimizer
    sgd = SGD(model.param(), lr=learning_rate)
    
    # constants
    nb_train_samples = train_inputs.size(0)
    nb_classes = train_targets.size(1)
    input_dim = train_inputs.size(1)
    
    
    # training in epochs
    test_error_list = []
    train_error_list = []

    for epoch in range(epochs):
        
        # Training -------------------------------------------------------------------------------
        acc_loss = 0
        nb_train_errors = 0
        # iterate through samples and accumelate derivatives
        for b in range(0, nb_train_samples, batch_size):
            dl_loss = 0
            for n in range(b, b+batch_size):
                # clear gradiants 1.(outside loop with samples = GD) 2.(inside loop with samples = SGD)
                

                ### In order to get nb_train_errors, check how many correctly classified

                # Get index of correct one, by taking argmax
                a_train_target = train_targets[n]
                train_targets_list = [a_train_target[0], a_train_target[1]]
                correct = train_targets_list.index(max(train_targets_list))


                output = model.forward(train_inputs[n])

                # Get index of the predicted of the two outputs, by taking argmax
                output_list = [output[0], output[1]]

                prediction = output_list.index(max(output_list))

                # Check if predicted correctly
                if int(correct) != int(prediction) : nb_train_errors += 1


                ### Calculate loss 
                acc_loss = acc_loss + loss(output, train_targets[n].float())
                dl_dloss = dl_loss+dloss(output, train_targets[n].float())


            model.backward(dl_dloss/batch_size)
            sgd.doit()
            sgd.zero_grad()
            
        train_error_list.append((100 * nb_train_errors) / train_inputs.size(0))
        
        # Testing --------------------------------------------------------------------------------
        nb_test_errors = 0
        
        for n in range(0, test_inputs.size(0)):
            
            
            ### In order to get nb_train_errors, check how many correctly classified
            
            a_test_target = test_targets[n]
            test_targets_list = [a_test_target[0], a_test_target[1]]
            correct = test_targets_list.index(max(test_targets_list)) # argmax
            
            ### Find which one is predicted of the two outputs, by taking argmax            
            
            output = model.forward(test_inputs[n])
            output_list = [output[0], output[1]]
            prediction = output_list.index(max(output_list))
            if int(correct) != int(prediction) : nb_test_errors += 1
                

        if epoch%(epochs/20) == 0:
            print('{:d} acc_train_loss {:.02f} acc_train_error {:.02f}% validation_error {:.02f}%'
              .format(epoch,
                      acc_loss,
                      (100 * nb_train_errors) / train_inputs.size(0),
                      (100 * nb_test_errors) / test_inputs.size(0)))
        test_error_list.append((100 * nb_test_errors) / test_inputs.size(0))

    return model, train_error_list, test_error_list

In [191]:
### Generate data
inputs, targets = generate_data(n=1000)


### Split the dataset into train, validation and test set
train_inputs, train_targets, validation_inputs, validation_targets, test_inputs, test_targets = split_dataset(inputs, targets)


### Normalize data
mu, std = inputs.mean(), inputs.std()
train_inputs.sub_(mu).div_(std)
validation_inputs.sub_(mu).div_(std)
test_inputs.sub_(mu).div_(std)


### Create model
input_dim = 2
hidden_width = 25
output_dim = 2

model = Sequential([Linear(input_dim, hidden_width), ReLu(), Linear(hidden_width, hidden_width), ReLu(), Linear(hidden_width, hidden_width), Tanh(), Linear(hidden_width, output_dim), Tanh()])

### Train model and log training and validation error
model, train_error_list, test_error_list = train_model2(train_inputs, train_targets, validation_inputs, validation_targets, 
                                                    model, learning_rate = 0.001, epochs=1500, batch_size=5)

### Print final training error
print('train_error {:.02f}%'.format(train_error_list[-1]))

### Test error
# HL.test_model(model, test_inputs, test_targets)


0 acc_train_loss 2744.68 acc_train_error 62.00% validation_error 60.00%
75 acc_train_loss 1598.16 acc_train_error 35.29% validation_error 28.00%
150 acc_train_loss 1158.48 acc_train_error 28.43% validation_error 25.00%
225 acc_train_loss 1081.00 acc_train_error 26.43% validation_error 20.00%
300 acc_train_loss 840.36 acc_train_error 21.86% validation_error 17.00%
375 acc_train_loss 756.77 acc_train_error 20.86% validation_error 14.00%
450 acc_train_loss 727.38 acc_train_error 21.00% validation_error 14.00%
525 acc_train_loss 707.72 acc_train_error 21.29% validation_error 14.00%
600 acc_train_loss 667.01 acc_train_error 17.86% validation_error 13.00%
675 acc_train_loss 635.76 acc_train_error 17.86% validation_error 11.00%
750 acc_train_loss 624.02 acc_train_error 17.71% validation_error 11.00%
825 acc_train_loss 624.13 acc_train_error 17.71% validation_error 11.00%
900 acc_train_loss 620.21 acc_train_error 17.86% validation_error 11.00%
975 acc_train_loss 615.80 acc_train_error 17.43% v