In [16]:
import numpy as np
from IPython import get_ipython
get_ipython().run_line_magic('matplotlib', 'inline')

In [17]:
def isolet_generator():
    
    #load_data
    dataset = np.genfromtxt('/Users/Irinaluu/Desktop/AML/project/isolet_data.data',
                     dtype=None,
                     delimiter=',')
    dataset_2 = np.genfromtxt('/Users/Irinaluu/Desktop/AML/project/isolet5.data',
                     dtype=None,
                     delimiter=',')
    data = np.vstack((dataset,dataset_2))
    Y = data[:,-1].astype(int) 
    X = np.delete(data, -1, axis =1)
    data_size = data.shape[0]
    #permutation
    shuffle = np.random.permutation(data_size)
    X = X[shuffle]
    Y = Y[shuffle]
    del shuffle, data, dataset, dataset_2
    
    for i in range(data_size):
        yield (X[i], Y[i])
    

In [27]:
def mnist_generator():
    
    #load mnist
    from sklearn.datasets import fetch_mldata
    dataset = fetch_mldata("MNIST original",data_home = 'dataset/mnist-original.mat')
    data_size = dataset.target.shape[0]
    #permutation
    shuffle = np.random.permutation(data_size)
    data = dataset.data[shuffle]
    label = dataset.target[shuffle]
    del shuffle, dataset
    
    #normalize
    data = data/np.reshape(np.std(data, axis=1), (data_size, 1))
    data = data-np.reshape(np.mean(data, axis=1), (data_size, 1))
    '''
    #resize 784 to 28*28
    data2 = np.zeros((data_size, 28, 28))
    for i in range (data_size):
    data2[i, ] = np.reshape(data[i, ], (28, 28))
    data = data2
    del data2
    '''
    for i in range(data_size):
        yield (data[i], label[i])


In [28]:
class OnlineLogistRegression(object):
    
    def __init__(self, lr=0.003):
        self.w = .01 * (np.random.rand(617 + 1) - 0.5)
        self.lr = lr
        
    def test(self, x):
        n = x.shape[0]
        A = np.concatenate((np.array([1]), x),axis=0)
        y_hat = A.dot(self.w)
        
        return 1 / (1 + np.exp(-y_hat))
        
    def train(self, x, c):
        n = x.shape[0]
        A = np.concatenate((np.array([1]), x), axis=0)
        y_hat = A.dot(self.w)
        c_hat = 1 / (1 + np.exp(-y_hat))
        
        fgrad = A.T.dot(c_hat - c)
        self.w = self.w - self.lr * fgrad

In [29]:
class Node(object):
    
    def __init__(self, node_index):
        self.left = None
        self.right = None
        self.parent = None
        self.n_all = 0
        self.e_all = 0
        self.n = []
        self.e = []
        self.class_list = []
        self.node_index = node_index
        self.model = OnlineLogistRegression()
        
    def setLeft(self, node):
        self.left = node

    def setRight(self, node):
        self.right = node
        
    def setParent(self, node):
        self.parent = node
        
    def testModel(self, x):
        return self.model.test(x)
    
    def trainModel(self, x, c):
        self.model.train(x, c)
        
    def addClass(self, class_name):
        self.n.append(0)
        self.e.append(0)
        self.class_list.append(class_name)
        
    def updateExpectation(self, c, class_index_in_node):
        self.n_all = self.n_all + 1
        self.e_all = self.e_all + c
        self.n[class_index_in_node] = self.n[class_index_in_node] + 1
        self.e[class_index_in_node] = self.e[class_index_in_node] + c
        
    def findExpectationAll(self):
        if self.n_all == 0:
            return 0
        else:
            return self.e_all / self.n_all
        
    def findExpectationOneClass(self, class_index_in_node):
        if self.n[class_index_in_node] == 0:
            return 0
        else:
            return self.e[class_index_in_node] / self.n[class_index_in_node]
    
    def judgeInTrain(self, class_index_in_node):
        #c == 0: left, c == 1: right
        return int(self.findExpectationAll() <= self.findExpectationOneClass(class_index_in_node))
    
    def judgeInTest(self, x):
        return int(self.findExpectationAll() <= self.testModel(x))

In [30]:
class Tree(object):
    def __init__(self):
        self.nodes = []
        self.size = 0
        self.root = Node(self.size)
        self.nodes.append(self.root)
        self.size = self.size + 1
        
    def giveBirth(self, node_index):
        #node_index is index of node in self.nodes, who is going to give birth to left and right
        #two new born nodes would be appended at the end of self.nodes
        self.nodes.append(Node(self.size))
        self.size = self.size + 1
        self.nodes[node_index].setLeft(self.nodes[-1])
        self.nodes[-1].setParent(self.nodes[node_index])
        self.nodes.append(Node(self.size))
        self.size = self.size + 1
        self.nodes[node_index].setRight(self.nodes[-1])
        self.nodes[-1].setParent(self.nodes[node_index])

    def onlineTrain(self, xy, node):
        x, y = xy
        
        #1:register if y is new in this node
        if_register = (y not in node.class_list)
        if if_register:
            node.addClass(y)
            
        #2:judge (if current h(x) is above average)
        class_index_in_node = node.class_list.index(y)
        #print('class_index_in_node =', class_index_in_node)
        #c == 0: left, c == 1: right
        c = node.judgeInTrain(class_index_in_node)
        #print('c =', c)
        
        #3:train
        node.trainModel(x, c)
        
        #4:update e, n
        c_hat = node.testModel(x)
        node.updateExpectation(c_hat, class_index_in_node)
        
        #5:give birth if second class arrives at this node
        if if_register and len(node.class_list) == 2:
            self.giveBirth(node.node_index)
            #the only previous class should be arranged to other child
            #though it cannot be trained in an online algorithm
            [node.left, node.right][1-c].addClass(node.class_list[0])
            
        #6:recursive down all the way to a leaf
        del x, y, if_register, class_index_in_node, c_hat
        if node.left != None:
            self.onlineTrain(xy, [node.left, node.right][c])

    def startOnlineTrain(self, xy):
        self.onlineTrain(xy, self.root)

    def onlineTest(self, x, node):
        if len(node.class_list) == 1:
            return node.class_list[0]
        else:
            return self.onlineTest(x, [node.left, node.right][node.judgeInTest(x)])

    def startOnlineTest(self, x):
        return self.onlineTest(x, self.root)

In [31]:
if __name__ == "__main__":
    
    #build
    my_tree = Tree()
    my_generator = isolet_generator()
    
    #train
    for i in range(6797):
        #print('\nindex of sample i =', i)
        my_tree.startOnlineTrain(next(my_generator))
    
    #test
    test_result = []
    for i in range(1000):
        x, y = next(my_generator)
        test_result.append(int(y == my_tree.startOnlineTest(x)))
    accurancy = np.mean(test_result)
    print('accurancy =', accurancy)

accurancy = 0.835


In [8]:
def swap(self, node_index):
    self.node=self.nodes[node_index]
    self.pa=self.node.parent
    self.gpa=self.pa.parent
    self.sib = self.pa.right if self.pa.left == self.node else self.pa.left
    if self.pa==self.gpa.left:
        self.gpa.left=self.sib
    else:
        self.gpa.right=self.sib
    updateC(self.sib)
def updateC(self, )

SyntaxError: invalid syntax (<ipython-input-8-1e467de47dc6>, line 11)