In [95]:
import numpy as np
import cPickle, gzip

from neon import logger as neon_logger
from neon.callbacks.callbacks import Callbacks
from neon.data import MNIST
from neon.initializers import Gaussian, GlorotUniform
from neon.layers import Affine, BranchNode, Multicost, Tree, GeneralizedCost, SingleOutputTree, MergeMultistream
from neon.models import Model
from neon.optimizers import RMSProp
from neon.transforms import Rectlin, Logistic, Softmax
from neon.transforms import CrossEntropyMulti, Misclassification, PrecisionRecall
from neon.util.argparser import NeonArgparser
from neon.data import ArrayIterator
from neon.backends import gen_backend

In [96]:
from neon.backends import gen_backend
be = gen_backend(backend='cpu',batch_size=10)

In [97]:
from neon.data import NervanaDataIterator
import numpy as np
import cPickle
import os

class SVHN(NervanaDataIterator):

    def __init__(self, X, Y, nclass):
        
        def onehot_gen(z, s, n):
            b = np.zeros((s, n))
            b[np.arange(s), z] = 1
            return b
        # Load the numpy data into some variables. We divide the image by 255 to normalize the values
        # between 0 and 1.
        self.X = X
        self.Y = Y
        self.nclass_t1 = nclass[0]
        self.nclass_t2 = nclass[1]
        self.nclass_t3 = nclass[2]
        self.shape = [x.shape[1] for x in X]
#         self.shape = lshape  # shape of the input data (e.g. for images, (C, H, W))

        # 1. assign some required and useful attributes
        self.start = 0  # start at zero
        self.ndata = self.X[0].shape[0]  # number of images in X (hint: use X.shape)
        self.nfeatures = self.X[0].shape[1]  # number of features in X (hint: use X.shape)
        self.Y_t1 = onehot_gen(self.Y[0], self.ndata, self.nclass_t1)
        self.Y_t2 = onehot_gen(self.Y[1], self.ndata, self.nclass_t2)
        self.Y_t3 = onehot_gen(self.Y[2], self.ndata, self.nclass_t3)

        # number of minibatches per epoch
        # to calculate this, use the batchsize, which is stored in self.be.bsz
        self.nbatches = self.ndata/self.be.bsz 
        
        
        # 2. allocate memory on the CPU for a minibatch's worth of data.
        # (e.g. use `self.be` to access the backend.). See the backend documentation.
        # to get the minibatch size, use self.be.bsz
        # hint: X should have shape (# features, mini-batch size)
        # hint: use some of the attributes previously defined above
        self.dev_X_t1 = self.be.zeros((self.nfeatures, self.be.bsz))
        self.dev_X_t2 = self.be.zeros((self.nfeatures, self.be.bsz))
        self.dev_X_t3 = self.be.zeros((self.nfeatures, self.be.bsz))
        self.dev_Y_t1 = self.be.zeros((self.Y_t1.shape[1], self.be.bsz))
        self.dev_Y_t2 = self.be.zeros((self.Y_t2.shape[1], self.be.bsz))
        self.dev_Y_t3 = self.be.zeros((self.Y_t3.shape[1], self.be.bsz))


    def reset(self):
        self.start = 0
           
    def __iter__(self):
        # 3. loop through minibatches in the dataset
        for index in range(self.start, self.ndata, self.be.bsz):
            # 3a. grab the right slice from the numpy arrays
            inputs_t1 = self.X[0][index:(index + self.be.bsz), :]
            targets_t1 = self.Y_t1[index:(index + self.be.bsz), :]
            inputs_t2 = self.X[1][index:(index + self.be.bsz), :]
            targets_t2 = self.Y_t2[index:(index + self.be.bsz), :]
            inputs_t3 = self.X[2][index:(index + self.be.bsz), :]
            targets_t3 = self.Y_t3[index:(index + self.be.bsz), :]
            
            
            # The arrays X and Y data are in shape (batch_size, num_features),
            # but the iterator needs to return data with shape (num_features, batch_size).
            # here we transpose the data, and then store it as a contiguous array. 
            # numpy arrays need to be contiguous before being loaded onto the GPU.
            inputs_t1 = np.ascontiguousarray(inputs_t1.T)
            targets_t1 = np.ascontiguousarray(targets_t1.T)
            inputs_t2 = np.ascontiguousarray(inputs_t2.T)
            targets_t2 = np.ascontiguousarray(targets_t2.T)
            inputs_t3 = np.ascontiguousarray(inputs_t3.T)
            targets_t3 = np.ascontiguousarray(targets_t3.T)
                        
            # here we test your implementation
            # your slice has to have the same shape as the GPU tensors you allocated
#             assert inputs[0].shape == self.dev_X_t1.shape, \
#                    "inputs has shape {}, but dev_X is {}".format(inputs.shape, self.dev_X.shape)
#             assert targets[0].shape == self.dev_Y_t1.shape, \
#                    "targets has shape {}, but dev_Y is {}".format(targets.shape, self.dev_Y.shape)
            
            # 3b. transfer from numpy arrays to device
            # - use the GPU memory buffers allocated previously,
            #    and call the myTensorBuffer.set() function. 
            self.dev_X_t1.set(inputs_t1)
            self.dev_Y_t1.set(targets_t1)
            self.dev_X_t2.set(inputs_t2)
            self.dev_Y_t2.set(targets_t2)
            self.dev_X_t3.set(inputs_t3)
            self.dev_Y_t3.set(targets_t3)
            
            # 3c. yield a tuple of the device tensors.
            # X should be of shape (num_features, batch_size)
            # Y should be of shape (4, batch_size)
            yield ((self.dev_X_t1, self.dev_X_t2, self.dev_X_t3 ), (self.dev_Y_t1, self.dev_Y_t2, self.dev_Y_t3))

In [98]:
# Set up a number of initial variables for use with baseline
NUM_TASKS = 3; # number of learning tasks (for multi-task learning)
NUM_FOLDS = 10; # number of folds for training (main cross validation loop)
NUM_EPOCH = 5; # number of epochs



truth_a_arr = []
pred_a_arr = []

truth_b_arr = []
pred_b_arr = []

truth_c_arr = []
pred_c_arr = []


In [99]:
for fold in range( NUM_FOLDS ):

    features_train = []
    labels_train = []
    truths_train = []

    features_test = []
    labels_test = []
    truths_test = []

    n_out = []

    for task in range( NUM_TASKS ):
        file_post = '.' + str(task) + '.' + str(fold) + '.pkl.gz'
        fname_train = 'train/train' + file_post; 
        fname_test  = 'test/test' + file_post; 
        with gzip.open( fname_train, 'rb' ) as f:
            feature_train, label_train = cPickle.load( f )

        with gzip.open( fname_test, 'rb') as f:
            feature_test, label_test = cPickle.load( f )

        features_train.append( feature_train )
        labels_train.append( label_train )

        features_test.append( feature_test )
        labels_test.append( label_test )

        mv = np.max( label_train )
        truth_train = np.zeros( ( len( label_train ), mv + 1 ) )
        for i in range( len( label_train ) ):
            truth_train[ i, label_train[ i ] ] = 1

        truths_train.append( truth_train )

        mv = np.max( label_test )
        truth_test = np.zeros( ( len( label_test ), mv + 1 ) )
        for i in range( len( label_test ) ):
            truth_test[ i, label_test[ i ] ] = 1

        truths_test.append( truth_test )

        n_out.append( mv + 1 )

    flen = len( feature_train[ 0 ] ); # input feature length is set to 400 for now based on the training examples available.


In [100]:
test_labels = [array[0:45] for array in labels_test]
test_features = [array[0:45] for array in features_test]

In [101]:
train = SVHN(X = features_train, Y = labels_train, nclass=n_out)

In [102]:
test = SVHN(X = test_features, Y = test_labels, nclass=n_out)

In [103]:
b1 = BranchNode(name='b1')
init = GlorotUniform()

In [104]:
t1_input = [Affine(nout=flen, name="t1_input", activation=Rectlin(), init=init)]
t2_input = [Affine(nout=flen, name="t2_input", activation=Rectlin(), init=init)]
t3_input = [Affine(nout=flen, name="t3_input", activation=Rectlin(), init=init)]
p1 = [MergeMultistream(layers = [t1_input, t2_input, t3_input], merge='stack'),
         Affine(nout=flen, name="share", activation=Rectlin(), init=init),
         b1,
         Affine(nout=flen, name="t1_3", activation=Rectlin(), init=init),
         Affine(nout=256, name="t1_4", activation=Rectlin(), init=init),
         Affine(nout=n_out[0], name="t2_out", activation=Rectlin(), init=init)]

p2 = [b1,
      Affine(nout=flen, name="t2_3", activation=Rectlin(), init=init),
      Affine(nout=256, name="t2_4", activation=Rectlin(), init=init),
      Affine(nout=n_out[1], name="t2_out", activation=Rectlin(), init=init)]
p3 = [b1,
      Affine(nout=flen, name="t3_3", activation=Rectlin(), init=init),
      Affine(nout=256, name="t3_4", activation=Rectlin(), init=init),
      Affine(nout=n_out[2], name="t3_out", activation=Rectlin(), init=init)]
model = Model(layers=Tree([p1, p2, p3]))

In [105]:
optimizer = RMSProp(learning_rate=0.001)
cost = Multicost(costs=[GeneralizedCost(costfunc=CrossEntropyMulti(usebits=True)),
                       GeneralizedCost(costfunc=CrossEntropyMulti(usebits=True)),
                       GeneralizedCost(costfunc=CrossEntropyMulti(usebits=True))])
                        
# callbacks_1 = Callbacks(model_1, eval_set=test_t1, eval_freq=NUM_EPOCH)
# callbacks_2 = Callbacks(model_2, eval_set=test_t2, eval_freq=NUM_EPOCH)
# callbacks_3 = Callbacks(model_3, eval_set=test_t3, eval_freq=NUM_EPOCH)

callbacks_1 = Callbacks(model, eval_set=train, eval_freq=NUM_EPOCH)

In [106]:
cost1 = CrossEntropyMulti(usebits=True)

In [107]:
model.fit(train, optimizer=optimizer, cost=cost, callbacks=callbacks_1, 
            num_epochs=NUM_EPOCH)

Epoch 0   [Train |████████████████████|  120/120  batches, (72.13, 72.13, 72.13) costs, 3.47s]
Epoch 1   [Train |████████████████████|  120/120  batches, (72.13, 72.13, 72.13) costs, 3.50s]
Epoch 2   [Train |████████████████████|  120/120  batches, (0.01, 72.13, 72.13) costs, 3.52s]
Epoch 3   [Train |████████████████████|  120/120  batches, (0.01, 72.13, 0.02) costs, 3.50s]
Epoch 4   [Train |████████████████████|  120/120  batches, (0.01, 72.13, 0.01) costs, 3.48s] [CrossEntropyMulti Loss 36.01, 0.52s]


In [94]:
testcost = GeneralizedCost(costfunc=CrossEntropyMulti(usebits=True))
model.get_outputs(test)

AttributeError: 'list' object has no attribute 'shape'