In [1]:
import numpy as np
import copy
import matplotlib.pyplot as plt

%matplotlib inline


In [18]:
class Blob:
    def __init__(self):
        self.data_ = None
        self.diff_ = None
        self.is_update_ = True
        return
    
    def setup(self, shape):
        num_args = len(shape)
        assert num_args == 1 or num_args == 2, 'the number of arguments should either be 1 or 2'
        self.data_ = np.empty(shape, dtype=float)  # of size (natoms#1+natoms#2+...) * feature_size
        self.diff_ = np.empty(shape, dtype=float) # of same size of self.coors
        return
    
    def shape(self):
        assert self.data_.shape == self.diff_.shape, 'the shape of data_ does not match with the shape of diff_'
        return self.data_.shape
    
    def update(self):
        if self.is_update_:
#             print 'updating'
#             print 'before: ', self.data_[0, 0]
            assert self.data_.shape == self.diff_.shape, 'the shape of data_ does not match with the shape of diff_'
            self.data_ -= self.diff_
#             print 'after: ', self.data_[0, 0]
        return

In [224]:
class Filler:
    def __init__(self, param):
        #         self.param_ = copy.deepcopy(param)
        self.type_ = param['type']
        self.param_ = copy.deepcopy(param)
        return
    
    def fill(self, shape):
        #         shape = container.shape
        size =  reduce(lambda count, item: count * item, shape, 1)
        weight = Blob()
        if self.type_ == 'gaussian':
            std = self.param_.get('std', 0.01)
            weight.data_ = np.random.randn(size).reshape(shape) * std
        elif self.type_ == 'constant':
            value = self.param_.get('value', 0.0)
            weight.data_ = np.ones(size).reshape(shape) * value
        return weight
    
# param = {'type': 'gaussian', 'std': 0.1}
# filler = Filler(param)
# w = filler.fill(np.array([20, 30]))
# plt.hist(w.reshape((-1)), bins=20)


# param = {'type': 'constant'}
# filler = Filler(param)
# w = filler.fill(np.array([20]))
# w

In [225]:
layer_creator = {}
def layer_register(layer_name, layer):
    assert layer_name not in layer_creator.keys(), 'layer %s has already been defined'%layer_name
    layer_creator[layer_name] = layer

In [226]:
class TxtReader:
    '''
    very inefficent
    '''
    def __init__(self, file_path):
        self.file_path_ = file_path
        self.multiple_label_ = None
        with open(self.file_path_, 'r') as fp:
            line = fp.readline()
        if len(line.split(',')) == 1:
            self.multiple_label_=False
        else:
            self.multiple_label_=True
        return
    
    def fetch_data(self, line_num):
        with open(self.file_path_, 'r') as fp:
            line = fp.readlines()[line_num]
        if self.multiple_label_:
            result = np.array(map(float, line.split(',')))
        else:
            result = np.array(float(line.strip()))
        return result
        
    def fetch_data(self, start_line, end_line):
        with open(self.file_path_, 'r') as fp:
            lines = fp.readlines()[start_line: end_line]
        if self.multiple_label_:
            result = np.array([map(float, line.split(',')) for line in lines])
        else:
            result = np.array([float(line.strip()) for line in lines])
        return result
        return np.array([map(float, line.split(',')) for line in lines])
        
    def peek(self):
        with open(self.file_path_, 'r') as fp:
            line = fp.readline()
        if self.multiple_label_:
            result = np.array(map(float, line.split(',')))
        else:
            result = np.array(float(line.strip()))
        return result
    
# a = TxtReader('./iris/labels.dat')
# print a.multiple_label_
# a.fetch_data(0, 2).shape

if 'TxtData' in layer_creator:
    del layer_creator['TxtData']

class TxtDataLayer:
    '''
    TODO: support none value in feature file, support shuffle
    data layer for inputs from text format
    expecting two txt files each for features and labels
    one line for one entry
    features should be seperated by commas
    only support single label for each entry
    '''
    def __init__(self, param):
        '''{'feature_file': 'path/to/feature/file', 'label_file': 'path/to/label/file', 'batch_size': batch_size, 'num_entries': num_entries}'''
        self.blobs_ = None
        self.batch_size_ = param['batch_size']
        self.feature_reader_ = TxtReader(param['feature_file'])
        self.label_reader_ = TxtReader(param['label_file'])
        
        self.idx_ = 0 # the current file number
        self.num_entries_ = param['num_entries'] # the number of entries in the feature file
        return
    
    def setup(self, bottoms, tops):
        '''
        tops[0] for feature blob, tops[1] for label blob
        '''
        assert len(tops) == 2, 'layer TxtDataLayer only support bottoms of length 2'
        num_features = self.feature_reader_.peek().shape[0]
        tops[0].setup((self.batch_size_, num_features))
        tops[1].setup((self.batch_size_, ))
        tops[0].is_update_ = False
        tops[1].is_update_ = False
        return
    
    def forward(self, bottoms, tops):
        # store data in text file is highly inefficent, would better to use database like lmdb or leveldb
        # take care of when idx_+batch_size_ larger than num_entries
        loops = []
        num_loops = (self.idx_ + self.batch_size_) / self.num_entries_
        if num_loops == 0:
            loops.append((self.idx_, self.idx_+self.batch_size_))
        else:
            loops.append((self.idx_, self.num_entries_))
            for i in range(1, num_loops):
                loops.append((0, self.num_entries_))
            if (self.idx_ + self.batch_size_) % self.num_entries_ != 0:
                loops.append((0, (self.idx_ + self.batch_size_) % self.num_entries_))
        self.idx_ = (self.idx_ + self.batch_size_) % self.num_entries_
        
        batch_idx = 0
        for loop in loops:
#             print loop
#             print tops[0].data_[batch_idx: batch_idx + loop[1] - loop[0]].shape
#             print self.feature_reader_.fetch_data(loop[0], loop[1]).shape
            tops[0].data_[batch_idx: batch_idx + loop[1] - loop[0]] = self.feature_reader_.fetch_data(loop[0], loop[1])
            tops[1].data_[batch_idx: batch_idx + loop[1] - loop[0]] = self.label_reader_.fetch_data(loop[0], loop[1])
            batch_idx = batch_idx + loop[1] - loop[0]
        return
    
    def backward(self, bottoms, tops):
        return

layer_register('TxtData', TxtDataLayer)
# txt_data_param = {'batch_size': 50, 'feature_file': './iris/features.dat', 'label_file': './iris/labels.dat', 'num_entries': 150}
# tdl = TxtDataLayer(txt_data_param)

# fb, lb = Blob(), Blob()
# tops = [fb, lb]
# bottoms = None

# tdl.setup(bottoms, tops)
# tdl.forward(bottoms, tops)
# print tops[0].data_[:2]
# print tops[1].data_[:2]
# tdl.forward(bottoms, tops)
# print tops[0].data_[:2]
# print tops[1].data_[:2]

In [227]:
if 'InnerProduct' in layer_creator:
    del layer_creator['InnerProduct']

class InnerProductLayer:
    
    def __init__(self, param):
        self.output_size_ = param['output_size']
        self.weight_filler_param_ = copy.deepcopy(param['weight_filler_param'])
        self.bias_filler_param_ = copy.deepcopy(param['bias_filler_param'])
        self.input_size_ = None
        self.batch_size_ = None
#         self.weight_ = None
#         self.bias_ = None
        self.blobs_ = [None, None]  # [weight_, bias_]
        return
        
    def setup(self, bottoms, tops):
        assert len(bottoms) == 1, 'layer InnerProductLayer only support bottoms of length 1'
        assert len(tops) == 1, 'layer InnerProductLayer only support tops of length 1'
        data = bottoms[0].data_
        self.batch_size_, self.input_size_ = data.shape
        tops[0].setup((self.batch_size_, self.output_size_))
        weight_filler = Filler(self.weight_filler_param_)
        self.blobs_[0] = weight_filler.fill((self.input_size_, self.output_size_))
        bias_filler = Filler(self.bias_filler_param_)
        self.blobs_[1] = bias_filler.fill((self.output_size_, ))
        return
    
    def forward(self, bottoms, tops):
        tops[0].data_ = np.dot(bottoms[0].data_, self.blobs_[0].data_) + self.blobs_[1].data_
        return
        
    def backward(self, bottoms, tops):
        bottoms[0].diff_ = np.dot(self.blobs_[0].data_, tops[0].diff_.T).T
        self.blobs_[0].diff_ = np.dot(bottoms[0].data_.T, tops[0].diff_)
        self.blobs_[1].diff_ = np.sum(tops[0].diff_, axis=0)
        return

layer_register('InnerProduct', InnerProductLayer)

In [228]:
if 'SoftmaxLoss' in layer_creator:
    del layer_creator['SoftmaxLoss']
class SoftmaxLossLayer:
    '''
    bottoms[0] of shape (batch_size * num_labels), unnormalized exp prob
    bottoms[1] of shape(batch_size * 1), ground truth label for each batch
    entries bottom[1] should be in range(0, num_labels)
    '''
    
    def __init__(self, param):
        self.prob_ = None
        self.blobs_ = None
        return
    
    def setup(self, bottoms, tops):
        assert len(bottoms) == 2, 'layer SoftmaxLossLayer only support bottoms of length 1'
        return
    
    def forward(self, bottoms, tops):
        self.prob_ = np.exp(bottoms[0].data_ - np.max(bottoms[0].data_, axis=1, keepdims=True))
        self.prob_ /= np.sum(self.prob_, axis=1, keepdims=True)
        N = bottoms[0].data_.shape[0]
        loss = -np.sum(np.log(self.prob_[np.arange(N), list(bottoms[1].data_)])) / N
        print loss
        return
    
    def backward(self, bottoms, tops):
        bottoms[0].diff_ = self.prob_.copy()
        N = bottoms[0].data_.shape[0]
        bottoms[0].diff_[np.arange(N), list(bottoms[1].data_)] -= 1
        bottoms[0].diff_ /= N
        return

layer_register('SoftmaxLoss', SoftmaxLossLayer)

# bottoms = [None] * 2
# bottoms[0] = Blob()
# bottoms[0].data_ = np.random.randn(20, 40)
# bottoms[1] = Blob()
# bottoms[1].data_ = np.random.randint(40, size=20)
# tops = []

# sm_layer = SoftmaxLossLayer()
# sm_layer.forward(bottoms, tops)
# sm_layer.backward(bottoms, tops)
# plt.plot(bottoms[0].data_[0, :], sm_layer.prob_[0, :], 'x')

In [20]:
# if 'EuclideanLoss' in layer_creator:
#     del layer_creator['EuclideanLoss']
class EuclideanLossLayer:
    '''
    bottoms[0] of shape (batch_size * num_values), predicted values
    bottoms[1] of shape(batch_size * num_values), ground truth values
    '''
    
    def __init__(self, param):
        self.blobs_ = None
        return
    
    def setup(self, bottoms, tops):
        assert len(bottoms) == 2, 'layer EuclideanLossLayer only support bottoms of length 2'
        return
    
    def forward(self, bottoms, tops):
        N = bottoms[0].data_.shape[0]
        loss = 0.5 * np.sum((bottoms[0].data_ - bottoms[1].data_) ** 2) / N
        print loss
        return
    
    def backward(self, bottoms, tops):
        N = bottoms[0].data_.shape[0]
        bottoms[0].diff_ = bottoms[0].data_ - bottoms[1].data_
        return

# layer_register('EuclideanLoss', EuclideanLossLayer)

# bottoms = [None] * 2
# bottoms[0] = Blob()
# bottoms[0].data_ = np.random.randn(20, 40)
# bottoms[1] = Blob()
# bottoms[1].data_ = np.random.randn(20, 40)
# tops = []

# param = None
# el_layer = EuclideanLossLayer(param)
# el_layer.forward(bottoms, tops)
# el_layer.backward(bottoms, tops)

41.0605740133


In [15]:
np.ones((10, 1)).reshape(-1)

array([ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.])

In [236]:
if 'Sigmoid' in layer_creator:
    del layer_creator['Sigmoid']

class SigmoidLayer:
    def __init__(self, param):
        self.blobs_ = None
        return
    
    def setup(self, bottoms, tops):
        assert len(bottoms) == 1, 'layer SigmoidLayer only support bottoms of length 1'
        assert len(tops) == 1, 'layer SigmoidLayer only support tops of length 1'
        tops[0].setup(bottoms[0].shape())
        return
    
    def forward(self, bottoms, tops):
        tops[0].data_ = 1.0 / (1.0 + np.exp(-bottoms[0].data_))
        return
    
    def backward(self, bottoms, tops):
        sig_diff = (1.0 - tops[0].data_) * tops[0].data_
        bottoms[0].diff_ = tops[0].diff_ * sig_diff
        return
layer_register('Sigmoid', SigmoidLayer)
    
# bottoms = [None]
# bottoms[0] = Blob()
# bottoms[0].data_ = np.random.randn(20, 40)
# tops = [None]
# tops[0] = Blob()
# tops[0].diff_ = np.ones((20, 40))

# sig_param = {'name': 'sig'}
# sig_layer = SigmoidLayer(sig_param)
# sig_layer.forward(bottoms, tops)
# plt.plot(bottoms[0].data_.reshape(-1), tops[0].data_.reshape(-1), 'x')
# sig_layer.backward(bottoms, tops)
# plt.plot(bottoms[0].data_.reshape(-1), bottoms[0].diff_.reshape(-1), 'x')

In [292]:
class Net:
    '''
    Param shoud be of format [{'name': 'layer1', 'bottoms': ['blob1', 'blob2'], 'tops': ['blob1', 'blob2'], 'param': param info}, ...]
    
    Blob names and layer names are used by Net to bookkeeping the blobs and layers to prevent use undefined blob or 
    double define blobs
    
    layers use param to initilize. use bottoms and tops to setup up and initilize for internal weights and bias
    '''
    
    def __init__(self, params):
        self.num_layers_ = len(params)
        self.layer_names_ = [layer['name'] for layer in params]
        self.layers_ = []
        
        self.blobs_ = []
        self.top_blobs_ = []
        self.bottom_blobs_ = []
        self.blob_name2idx_ = {}
        self.blob_names_ = []
        
        self.learnable_params_ = []
        
        for param in params:
            # initialize layer
            layer = layer_creator[param['type']](param)
            self.layers_.append(layer)
            bottoms = []
            tops = []
            
            # initialize bottom blobs
            for bottom_name in param['bottoms']:
                assert bottom_name in self.blob_names_, 'blob %s has note defined'%bottom_name
                blob_id = self.blob_name2idx_[bottom_name]
                bottoms.append(self.blobs_[blob_id])
            self.bottom_blobs_.append(bottoms)
            
            # initialize top blobs
            for top_name in param['tops']:
                if top_name in self.blob_names_:
                    if top_name in bottoms:
                        # inplace layer, to be implemented
                        raise NnplException('blob %s has already been defined. Not support inplace layer yet'%top_name)
                    if top_name not in bottoms:
                        raise NnplException('blob %s has already been defined.'%top_name)
                else:
                    # blob top_name has not been defined
                    blob_id = len(self.blob_names_)
                    self.blob_name2idx_[top_name] = blob_id
                    self.blob_names_.append(top_name)
                    top = Blob()
                    tops.append(top)
                    self.blobs_.append(top)
            self.top_blobs_.append(tops)
            
            # setup layer
            layer.setup(bottoms, tops)
            
            #set learnable_params
            if layer.blobs_ != None:
                for blob in layer.blobs_:
                    if blob.is_update_:
                        self.learnable_params_.append(blob)
        return
    
    def forward(self):
        loss = 0.0
        for i in range(self.num_layers_):
            layer = self.layers_[i]
            layer.forward(self.bottom_blobs_[i], self.top_blobs_[i])
#             loss += layer.forward(self.bottom_blobs_[i], self.top_blobs_[i])
        return
    
    def backward(self):
        for i in reversed(range(self.num_layers_)):
            layer = self.layers_[i]
            layer.backward(self.bottom_blobs_[i], self.top_blobs_[i])
        return
    
    def forward_backward(self):
        self.forward()
        self.backward()
        return
    
    def update(self):
        for layer in self.layers_:
            if layer.blobs_ == None:
                continue
            for blob in layer.blobs_:
                blob.update()

In [293]:
a = []
b = [1, 2]
c = [3, 4]
np.concatenate((a, b, c))

array([ 1.,  2.,  3.,  4.])

In [294]:
class Solver:
    def __init__(self, param):
        self.net_ = Net(param['net'])
        self.type_ = param['type']
        self.lr_rate_ = param['lr_rate']
        self.max_iter_ = param['max_iter']
        return
    
    def solve(self):
        self.step(self.max_iter_)
        return
    
    def step(self, iters):
        for i in range(iters):
            self.net_.forward_backward()
            for blob in self.net_.learnable_params_:
                blob.diff_ *= self.lr_rate_
            self.net_.update()
        return

In [317]:
txt_data_param = {}
txt_data_param['type'] = 'TxtData'
txt_data_param['tops'] = ['features', 'labels']
txt_data_param['bottoms'] = []
txt_data_param['batch_size'] = 150
txt_data_param['feature_file'] = './iris/features.dat'
txt_data_param['label_file'] = './iris/labels.dat'
txt_data_param['num_entries'] = 150
txt_data_param['name'] = 'Data'

weight_filler_param1 = {'type': 'gaussian', 'std': 0.01}
bias_filler_param1 = {'type': 'constant'}
ip_param1 = {}
ip_param1['type'] = 'InnerProduct'
ip_param1['tops'] = ['ip1']
ip_param1['bottoms'] = ['features']
ip_param1['output_size'] = 100
ip_param1['name'] = 'ip1'
ip_param1['weight_filler_param'] = weight_filler_param1
ip_param1['bias_filler_param'] = bias_filler_param1

sig_param = {}
sig_param['type'] = 'Sigmoid'
sig_param['name'] = 'sig'
sig_param['bottoms'] = ['ip1']
sig_param['tops'] = ['sig']

weight_filler_param2 = {'type': 'gaussian', 'std': 0.01}
bias_filler_param2 = {'type': 'constant'}
ip_param2 = {}
ip_param2['type'] = 'InnerProduct'
ip_param2['tops'] = ['ip2']
ip_param2['bottoms'] = ['sig']
ip_param2['output_size'] = 3
ip_param2['name'] = 'ip2'
ip_param2['weight_filler_param'] = weight_filler_param2
ip_param2['bias_filler_param'] = bias_filler_param2

sm_param = {}
sm_param['type'] = 'SoftmaxLoss'
sm_param['name'] = 'softmax'
sm_param['bottoms'] = ['ip2', 'labels']
sm_param['tops'] = []

net_param = [txt_data_param, ip_param1, sig_param, ip_param2, sm_param]

solver_param = {'net': net_param, 'lr_rate': 0.5, 'max_iter': 500, 'type': 'sgd'}

In [318]:
s = Solver(solver_param)

In [319]:
s.solve()

1.09920399189
1.10251673785
1.14750381477
1.54160233348
2.44239670706
1.95529456788
1.6454781027
1.27735486609
1.1375970113
1.10741357534
1.08901278448
1.08087637066
1.0725020127
1.06564983673
1.0581913356
1.05022905554
1.04109363148
1.03061340232
1.01837828349
1.00419398215
0.987777134588
0.969083353244
0.948089359026
0.925040803972
0.900195252518
0.873996604048
0.846826653748
0.819171971456
0.791417167972
0.764000654137
0.737237810701
0.711455610543
0.686850711593
0.663603020556
0.641777716949
0.621417286068
0.602481568939
0.584923514415
0.568647232425
0.5535776847
0.539615912569
0.526701006492
0.514769464356
0.503801589673
0.493837087434
0.485015554402
0.477734746784
0.472787123062
0.47177288354
0.477505205232
0.493526324544
0.522108033747
0.556192246642
0.577590769663
0.581015792816
0.55506164565
0.53920530449
0.506585151223
0.492711261562
0.469564005118
0.459734389319
0.444462626771
0.437857593898
0.427627178084
0.423494712614
0.416580642889
0.414520014645
0.409999015795
0.4097867

In [320]:
s.net_.blob_names_

['features', 'labels', 'ip1', 'sig', 'ip2']

In [321]:
gt = s.net_.blobs_[s.net_.blob_name2idx_['labels']].data_
# n.blobs_[n.blob_name2idx_['features']].diff_

In [322]:
pred = np.argmax(s.net_.blobs_[s.net_.blob_name2idx_['ip2']].data_, axis=1)

In [323]:
np.sum(pred == gt)

148

In [None]:
a = [np]

In [327]:
from nnpl import 