In [1]:
import torch
from torchvision import datasets

In [2]:
import matplotlib.pyplot as plt
import torch
import numpy as np
from utils import labels2one_hot
from generate_data import generate_data, generate_grid_data
from modules import Dropout
from modules import Linear, Sequential
from modules.losses import LossCrossEntropy
from optimizers import SGD

In [3]:
def convert_to_one_hot_labels(input, target):
    tmp = input.new(target.size(0), target.max() + 1).fill_(-1)
    tmp.scatter_(1, target.view(-1, 1), 1.0)
    return tmp

def load_data(one_hot_labels = False, normalize = False, flatten = True, data_dir = None, cifar = False, full = True, tiny = False):

    if data_dir is None:
        """
        data_dir = os.environ.get('PYTORCH_DATA_DIR')
        if data_dir is None:
            data_dir = './data'
        """
        data_dir = './data'
        
    if cifar is not None and cifar:
        print('* Using CIFAR')
        cifar_train_set = datasets.CIFAR10(data_dir + '/cifar10/', train = True, download = True)
        cifar_test_set = datasets.CIFAR10(data_dir + '/cifar10/', train = False, download = True)

        train_input = torch.from_numpy(cifar_train_set.train_data)
        # Dirty hack to handle the change between torchvision 1.0.6 and 1.0.8
        if train_input.size(3) == 3:
            train_input = train_input.transpose(3, 1).transpose(2, 3).float()
        else:
            train_input = train_input.float()
        train_target = torch.LongTensor(cifar_train_set.train_labels)

        test_input = torch.from_numpy(cifar_test_set.test_data).float()
        # Dirty hack to handle the change between torchvision 1.0.6 and 1.0.8
        if test_input.size(3) == 3:
            test_input = test_input.transpose(3, 1).transpose(2, 3).float()
        else:
            test_input = test_input.float()
        test_target = torch.LongTensor(cifar_test_set.test_labels)

    else:
        print('* Using MNIST')
        mnist_train_set = datasets.MNIST(data_dir + '/mnist/', train = True, download = True)
        mnist_test_set = datasets.MNIST(data_dir + '/mnist/', train = False, download = True)

        train_input = mnist_train_set.train_data.view(-1, 1, 28, 28).float()
        train_target = mnist_train_set.train_labels
        test_input = mnist_test_set.test_data.view(-1, 1, 28, 28).float()
        test_target = mnist_test_set.test_labels

    if flatten:
        train_input = train_input.clone().view(train_input.size(0), -1)
        test_input = test_input.clone().view(test_input.size(0), -1)

    if full:
        if tiny:
            raise ValueError('Cannot have both --full and --tiny')
    else:
        if tiny:
            print('** Reduce the data-set to the tiny setup')
            train_input = train_input.narrow(0, 0, 500)
            train_target = train_target.narrow(0, 0, 500)
            test_input = test_input.narrow(0, 0, 100)
            test_target = test_target.narrow(0, 0, 100)
        else:
            print('** Reduce the data-set (use --full for the full thing)')
            train_input = train_input.narrow(0, 0, 1000)
            train_target = train_target.narrow(0, 0, 1000)
            test_input = test_input.narrow(0, 0, 1000)
            test_target = test_target.narrow(0, 0, 1000)

    print('** Use {:d} train and {:d} test samples'.format(train_input.size(0), test_input.size(0)))

    if one_hot_labels:
        train_target = convert_to_one_hot_labels(train_input, train_target)
        test_target = convert_to_one_hot_labels(test_input, test_target)

    if normalize:
        mu, std = train_input.mean(), train_input.std()
        train_input.sub_(mu).div_(std)
        test_input.sub_(mu).div_(std)

    return train_input, train_target, test_input, test_target


In [4]:
train_input, train_target, test_input, test_target = load_data(one_hot_labels = True, normalize = True, flatten = True, data_dir = None, cifar = False, full = True, tiny = False)

* Using MNIST
** Use 60000 train and 10000 test samples


In [5]:
def default_net_1(x_all, y_all, num_of_neurons=(2, 25, 25, 25, 2), lr=0.1, momentum_coef=0.0, num_of_epochs=100):
    ce = LossCrossEntropy()

    model = Sequential()
    model.add(Linear(out=num_of_neurons[1], input_size=num_of_neurons[0], activation='relu'))
    model.add(Linear(out=num_of_neurons[2], activation='relu'))
    model.add(Linear(out=num_of_neurons[2], activation='relu'))
    model.add(Dropout(prob=0.2))
    model.add(Linear(out=num_of_neurons[4], activation='softmax'))

    model.loss = ce
    sgd = SGD(lr, momentum_coef, weight_decay=0.2)

    sgd.train(model, x_all, y_all, num_of_epochs, val_split=0.2)

    return model

In [6]:
model = default_net_1(train_input, train_target, num_of_neurons=(784, 100, 100, 100, 10), lr=0.1, momentum_coef=0.0, num_of_epochs=10)
loss1 = model.loss.loss_logging

x_test, y_test = generate_grid_data(minn=0, maxx=1, num_of_points_per_dim=51)

model.evaluate(test_input, labels2one_hot(test_target, val=0), return_pred=True)

prev: 100, current: 784
Added Module Name: 0_Linear 
Added Module Name: 1_ReLU 
prev: 100, current: 100
Added Module Name: 2_Linear 
Added Module Name: 3_ReLU 
prev: 100, current: 100
Added Module Name: 4_Linear 
Added Module Name: 5_ReLU 
Added Module Name: 6_Dropout 
prev: 10, current: 100
Added Module Name: 7_Linear 
Added Module Name: 8_Softmax 
tensor([[ 0.0911,  0.0923,  0.1048,  ...,  0.0969,  0.0953,  0.1160],
        [ 0.0848,  0.1034,  0.1107,  ...,  0.1066,  0.0936,  0.0984],
        [ 0.0893,  0.0896,  0.1058,  ...,  0.1095,  0.1021,  0.1106],
        ...,
        [ 0.0820,  0.1000,  0.0938,  ...,  0.1140,  0.1043,  0.1017],
        [ 0.0929,  0.0892,  0.1082,  ...,  0.1028,  0.0969,  0.1037],
        [ 0.0962,  0.0856,  0.1081,  ...,  0.0991,  0.0986,  0.1059]])


  return loss_val[0]  # TODO: handle this accordingly with take_avg false


tensor([[ 0.0869,  0.1038,  0.1037,  ...,  0.1074,  0.1003,  0.0967],
        [ 0.0919,  0.1022,  0.0987,  ...,  0.0965,  0.0938,  0.0949],
        [ 0.0898,  0.1017,  0.1086,  ...,  0.1069,  0.0946,  0.1066],
        ...,
        [ 0.0796,  0.1015,  0.0987,  ...,  0.1189,  0.0962,  0.1029],
        [ 0.0983,  0.0893,  0.0983,  ...,  0.0994,  0.0956,  0.1098],
        [ 0.0932,  0.0992,  0.1094,  ...,  0.0988,  0.0904,  0.1017]])
tensor([[ 0.0869,  0.1038,  0.1037,  ...,  0.1074,  0.1003,  0.0967],
        [ 0.0919,  0.1022,  0.0987,  ...,  0.0965,  0.0938,  0.0949],
        [ 0.0898,  0.1017,  0.1086,  ...,  0.1069,  0.0946,  0.1066],
        ...,
        [ 0.0796,  0.1015,  0.0987,  ...,  0.1189,  0.0962,  0.1029],
        [ 0.0983,  0.0893,  0.0983,  ...,  0.0994,  0.0956,  0.1098],
        [ 0.0932,  0.0992,  0.1094,  ...,  0.0988,  0.0904,  0.1017]])
tensor([[ 0.0899,  0.0970,  0.1042,  ...,  0.1045,  0.1021,  0.1099],
        [ 0.0926,  0.1022,  0.1051,  ...,  0.0967,  0.0883,  0

        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  1.0000]])
tensor([[ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  1.0000,  0.0000],
        [ 1.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  1.0000],
        ...,
        [ 1.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  1.0000],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  1.0000]])
tensor([[ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  1.0000,  0.0000],
        [ 1.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  1.0000],
        ...,
        [ 1.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  1.0000],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  1.0000]])
tensor([[ 0.,  0.,  0.,  ...,  0.,  0.,  0.],
        [ 0.,  

AssertionError: 