In [1]:
import pandas as pd
import numpy as np
from collections import defaultdict

class MaxEnt:
    def __init__(self, max_iter=100):
        # 训练输入
        self.X_ = None
        # 训练标签
        self.y_ = None
        # 标签类别数量
        self.m = None   
        # 特征数量
        self.n = None   
        # 训练样本量
        self.N = None   
        # 常数特征取值
        self.M = None
        # 权重系数
        self.w = None
        # 标签名称
        self.labels = defaultdict(int)
        # 特征名称
        self.features = defaultdict(int)
        # 最大迭代次数
        self.max_iter = max_iter

    ### 计算特征函数关于经验联合分布P(X,Y)的期望
    def _EP_hat_f(self, x, y):
        self.Pxy = np.zeros((self.m, self.n))
        self.Px = np.zeros(self.n)
        for x_, y_ in zip(x, y):
            # 遍历每个样本
            for x__ in set(x_):
                self.Pxy[self.labels[y_], self.features[x__]] += 1
                self.Px[self.features[x__]] += 1           
        self.EP_hat_f = self.Pxy/self.N
    
    ### 计算特征函数关于模型P(Y|X)与经验分布P(X)的期望
    def _EP_f(self):
        self.EPf = np.zeros((self.m, self.n))
        for X in self.X_:
            pw = self._pw(X)
            pw = pw.reshape(self.m, 1)
            px = self.Px.reshape(1, self.n)
            self.EP_f += pw*px / self.N
    
    ### 最大熵模型P(y|x)
    def _pw(self, x):
        mask = np.zeros(self.n+1)
        for ix in x:
            mask[self.features[ix]] = 1
        tmp = self.w * mask[1:]
        pw = np.exp(np.sum(tmp, axis=1))
        Z = np.sum(pw)
        pw = pw/Z
        return pw

    ### 熵模型拟合
    ### 基于改进的迭代尺度方法IIS
    def fit(self, x, y):
        # 训练输入
        self.X_ = x
        # 训练输出
        self.y_ = list(set(y))
        # 输入数据展平后集合
        tmp = set(self.X_.flatten())
        # 特征命名
        self.features = defaultdict(int, zip(tmp, range(1, len(tmp)+1)))   
        # 标签命名
        self.labels = dict(zip(self.y_, range(len(self.y_))))
        # 特征数
        self.n = len(self.features)+1  
        # 标签类别数量
        self.m = len(self.labels)
        # 训练样本量
        self.N = len(x)  
        # 计算EP_hat_f
        self._EP_hat_f(x, y)
        # 初始化系数矩阵
        self.w = np.zeros((self.m, self.n))
        # 循环迭代
        i = 0
        while i <= self.max_iter:
            # 计算EPf
            self._EP_f()
            # 令常数特征函数为M
            self.M = 100
            # IIS算法步骤(3)
            tmp = np.true_divide(self.EP_hat_f, self.EP_f)
            tmp[tmp == np.inf] = 0
            tmp = np.nan_to_num(tmp)
            sigma = np.where(tmp != 0, 1/self.M*np.log(tmp), 0)  
            # 更新系数:IIS步骤(4)
            self.w = self.w + sigma
            i += 1
        print('training done.')
        return self

    # 定义最大熵模型预测函数
    def predict(self, x):
        res = np.zeros(len(x), dtype=np.int64)
        for ix, x_ in enumerate(x):
            tmp = self._pw(x_)
            print(tmp, np.argmax(tmp), self.labels)
            res[ix] = self.labels[self.y_[np.argmax(tmp)]]
        return np.array([self.y_[ix] for ix in res])

In [2]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
raw_data = load_iris()
X, labels = raw_data.data, raw_data.target
X_train, X_test, y_train, y_test = train_test_split(X, labels, test_size=0.3, random_state=43)
print(X_train.shape, y_train.shape)

(105, 4) (105,)


In [3]:
labels[-5:]

array([2, 2, 2, 2, 2])

In [4]:
from sklearn.metrics import accuracy_score
maxent = MaxEnt()
maxent.fit(X_train, y_train)
y_pred = maxent.predict(X_test)
print(accuracy_score(y_test, y_pred))



training done.
[0.87116843 0.04683368 0.08199789] 0 {0: 0, 1: 1, 2: 2}
[0.00261138 0.49573305 0.50165557] 2 {0: 0, 1: 1, 2: 2}
[0.12626693 0.017157   0.85657607] 2 {0: 0, 1: 1, 2: 2}
[1.55221378e-04 4.45985560e-05 9.99800180e-01] 2 {0: 0, 1: 1, 2: 2}
[7.29970746e-03 9.92687370e-01 1.29226740e-05] 1 {0: 0, 1: 1, 2: 2}
[0.01343943 0.01247887 0.9740817 ] 2 {0: 0, 1: 1, 2: 2}
[0.85166079 0.05241898 0.09592023] 0 {0: 0, 1: 1, 2: 2}
[0.00371481 0.00896982 0.98731537] 2 {0: 0, 1: 1, 2: 2}
[2.69340079e-04 9.78392776e-01 2.13378835e-02] 1 {0: 0, 1: 1, 2: 2}
[0.01224702 0.02294254 0.96481044] 2 {0: 0, 1: 1, 2: 2}
[0.00323508 0.98724246 0.00952246] 1 {0: 0, 1: 1, 2: 2}
[0.00196548 0.01681989 0.98121463] 2 {0: 0, 1: 1, 2: 2}
[0.00480966 0.00345107 0.99173927] 2 {0: 0, 1: 1, 2: 2}
[0.00221101 0.01888735 0.97890163] 2 {0: 0, 1: 1, 2: 2}
[9.87528545e-01 3.25313387e-04 1.21461416e-02] 0 {0: 0, 1: 1, 2: 2}
[3.84153917e-05 5.25603786e-01 4.74357798e-01] 1 {0: 0, 1: 1, 2: 2}
[0.91969448 0.00730851 0.0729