# 第11章 CRF
## By LiuGang - 2018/11/23
## Reference Book - statistical learning method (Chinese)

## No.1- problem for predicting<br>1:  Create some data

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import copy
%matplotlib inline

### 2: Class

In [1]:
import numpy as np
 
class CRF(object):
    '''实现条件随机场预测问题的维特比算法
    '''
    def __init__(self, V, VW, E, EW):
        '''
        :param V:是定义在节点上的特征函数，称为状态特征
        :param VW:是V对应的权值
        :param E:是定义在边上的特征函数，称为转移特征
        :param EW:是E对应的权值
        '''
        self.V  = V  #点分布表
        self.VW = VW #点权值表
        self.E  = E  #边分布表
        self.EW = EW #边权值表
        self.D  = [] #Delta表，最大非规范化概率的局部状态路径概率
        self.P  = [] #Psi表，当前状态和最优前导状态的索引表s
        self.BP = [] #BestPath，最优路径
        return 
        
    def Viterbi(self):
        '''
        条件随机场预测问题的维特比算法，此算法一定要结合CRF参数化形式对应的状态路径图来理解，更容易理解.
        '''
        self.D = np.full(shape=(np.shape(self.V)), fill_value=.0)
        self.P = np.full(shape=(np.shape(self.V)), fill_value=.0)
        for i in range(np.shape(self.V)[0]):
            #初始化
            if 0 == i:
                self.D[i] = np.multiply(self.V[i], self.VW[i])
                self.P[i] = np.array([0,0,0,0])
            #递推求解布局最优状态路径
            else:
                for y in range(np.shape(self.V)[1]): #delta[i][y=1,2...]
                    for l in range(np.shape(self.V)[1]): #V[i-1][l=1,2...]
                        #前导状态的最优状态路径的概率 + 前导状态到当前状体的转移概率 + 当前状态的概率
                        delta = self.D[i-1, l] + self.E[i-1][l,y]*self.EW[i-1][l,y]  + self.V[i,y]*self.VW[i,y]            #
                        if 0 == l or delta > self.D[i, y]:
                            self.D[i, y], self.P[i, y] = delta, l        
        #返回，得到所有的最优前导状态
        N = np.shape(self.V)[0]
        self.BP = np.full(shape=(N,), fill_value=0.0)
        t_range = -1 * np.array(sorted(-1*np.arange(N)))
        for t in t_range:
            if N-1 == t:#得到最优状态
                self.BP[t] = np.argmax(self.D[-1])
            else: #得到最优前导状态
                self.BP[t] = self.P[t+1, int(self.BP[t+1])]
        
        #最优状态路径表现在存储的是状态的下标，我们执行存储值+1转换成示例中的状态值
        #也可以不用转换，只要你能理解，self.BP中存储的0是状态1就可以~~~~
        self.BP += 1
        return self.BP


最优状态路径为: [1. 4. 2. 4. 3. 2.]


In [11]:
S = np.array([[1,0,0,1],    # X = '我 不 是 外 国 人'
              [1,0,0,1],  
              [1,1,0,0],
              [0,1,0,1],
              [0,1,1,1],
              [1,1,0,0]])  
SW = np.array([[1,0,0,1],   
               [1,0,0,1],  
               [1,1,0,0],
               [0,1,0,1],
               [1,1,0,0],
               [1,1,0,0]]) 
E = np.array([[[0,0,0,1], 
               [1,0,0,1],
               [0,1,1,0],
               [0,1,1,0]],
              [[0,0,0,1], 
               [1,0,0,1],
               [0,1,1,0],
               [0,1,1,0]],
              [[0,0,0,1], 
               [1,0,0,1],
               [0,1,1,0],
               [0,1,1,0]],
              [[0,0,0,1], 
               [1,0,0,1],
               [0,1,1,0],
               [0,1,1,0]],
              [[0,0,0,1], 
               [1,0,0,1],
               [0,1,1,0],
               [0,1,1,0]]])
EW = np.array([[[0.3,0,0,0.7], 
                [0.4,0,0,0.6],
                [0,0.7,0.3,0],
                [0,0.6,0.4,0]],
               [[0.3,0,0,0.7], 
                [0.4,0,0,0.6],
                [0,0.7,0.3,0],
                [0,0.6,0.4,0]],
               [[0.3,0,0,0.7], 
                [0.4,0,0,0.6],
                [0,0.7,0.3,0],
                [0,0.6,0.4,0]],
               [[0.3,0,0,0.7], 
                [0.4,0,0,0.6],
                [0,0.7,0.3,0],
                [0,0.6,0.4,0]],
               [[0.3,0,0,0.7], 
                [0.4,0,0,0.6],
                [0,0.7,0.3,0],
                [0,0.6,0.4,0]]])

crf = CRF(S, SW, E, EW)
ret = crf.Viterbi()
print('最优状态路径为:', ret)
# 1-S, 2-E, 3-M, 4-B

最优状态路径为: [1. 4. 2. 4. 2. 1.]
