# 第6章 MaxEntropy
## By LiuGang - 2018/11/16
## Reference Book - statistical learning method (Chinese)

### 1:  Create some data

In [99]:
import numpy as np
# X = np.array([[1,2],[2,2],[3,1],[5,-0.5],[1,1],[2,0],[0.5,1],[-1,2]])
# y = np.array([1,1,1,1,0,0,0,0])

np.random.seed(5)
X = np.random.randint(1,1000,size=(1000, 20))
y = np.array([1 if i%2==0 else 0 for i in range(1000)])

### 2: Class

In [103]:
class MaxEntropy():
    def __init__(self, X, y, M):
        self.X = X
        self.y = y
        self.N = len(self.X)
        self.M = M  # 0.001
        self.build_xy_num()

    def build_xy_num(self):
        self.xy2id = {}
        self.id2xy = {}
        num = 0
        for i in range(self.N):
            for j in range(len(X[i])):
                if (X[i][j], self.y[i]) not in self.xy2id:
                    self.id2xy[num] = (X[i][j], self.y[i])
                    self.xy2id[(X[i][j], self.y[i])] = num
                    num += 1
        self.nn = num
        self.w = np.squeeze(np.zeros((1,self.nn)))
        #self.Epx = np.squeeze(np.zeros((1,self.nn)))
        #self.Epyx = np.squeeze(np.zeros((1,self.nn)))
        
    def get_pyix(self, x, yi):
        w = 0
        for xi in x:
            if (xi, yi) in self.xy2id:
                w += self.w[self.xy2id[(xi, yi)]]
        return np.exp(w)
    
    def get_pyx(self, x, y):
        pyx = []
        for yi in set(y):
            pyx.append((self.get_pyix(x, yi), yi))
        return pyx
    
    def get_x_prob(self, x):
        pyx = self.get_pyx(x, self.y)
        Z = sum([py[0] for py in pyx])
        return [(py[0]/Z, py[1]) for py in pyx]
    
    def get_Epx(self):
        Epx = np.squeeze(np.zeros((1, self.nn)))
        for x in self.X:
            pyx_p_y = self.get_x_prob(x)
            for pyx_prob, yi in pyx_p_y:
                for xi in x:
                    #if (float(xi), float(yi)) in self.xy2id:
                    #print(yi)
                    if (xi, yi) in self.xy2id:
                        Epx[self.xy2id[(xi, yi)]] += pyx_prob / self.N
        return Epx
    
    def get_Epyx(self):
        Epyx = np.squeeze(np.zeros((1, self.nn)))
        for i,x in enumerate(self.X):
            yi = self.y[i]
            for xi in x:
                if (xi, yi) in self.xy2id:
                    Epyx[self.xy2id[(xi, yi)]] += 1 / self.N
        return Epyx
    
    def train(self, epoch=10,scope=1):
        Epyx = self.get_Epyx()
        for i in range(epoch):
            Epx = self.get_Epx()
            delta_sigma = self.M*np.log(Epyx / Epx)
            self.w = self.w + delta_sigma
            pred = np.array(self.predict(self.X))
            diff = self.y - pred
            accuray = len(diff[diff==0])/len(self.y)
            if i % scope ==0:
                print('----training accuracy:'+str(accuray)+'----' )
    
    def predict(self, xdata):
        pred = []
        for data in xdata:
            predi = max(self.get_x_prob(data), key = lambda x:x[0])[1]
            pred.append(predi)
        return pred
    
    
            
    
    

In [104]:
myME = MaxEntropy(X, y, 0.01)
myME.train(epoch=1000, scope=10)

----training accuracy:0.862----
----training accuracy:0.868----
----training accuracy:0.874----
----training accuracy:0.889----
----training accuracy:0.899----
----training accuracy:0.911----
----training accuracy:0.917----
----training accuracy:0.928----
----training accuracy:0.932----
----training accuracy:0.936----
----training accuracy:0.938----
----training accuracy:0.943----
----training accuracy:0.945----
----training accuracy:0.948----
----training accuracy:0.953----
----training accuracy:0.956----
----training accuracy:0.962----
----training accuracy:0.967----
----training accuracy:0.969----
----training accuracy:0.973----
----training accuracy:0.974----
----training accuracy:0.975----
----training accuracy:0.976----
----training accuracy:0.976----
----training accuracy:0.979----
----training accuracy:0.98----
----training accuracy:0.98----
----training accuracy:0.981----
----training accuracy:0.982----
----training accuracy:0.983----
----training accuracy:0.983----
----traini

KeyboardInterrupt: 