In [58]:
import numpy as np
from typing import List

In [59]:
# 定义状态集合，数量为7
num_states = 7
i_to_n = {"0": "C1", "1":"C2", "2":"C3", "3":"Pass", "4":"Pub", "5":"FB", "6":"Sleep"}
n_to_i = {v:int(k) for k,v in i_to_n.items()}

# 定义状态转移矩阵
# C1 C2 C3 Pass Pub FB Sleep
Pss = [ # 状 态 转 移 概 率 矩 阵
[ 0.0, 0.5, 0.0, 0.0, 0.0, 0.5, 0.0 ],
[ 0.0, 0.0, 0.8, 0.0, 0.0, 0.0, 0.2 ],
[ 0.0, 0.0, 0.0, 0.6, 0.4, 0.0, 0.0 ],
[ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0 ],
[ 0.2, 0.4, 0.4, 0.0, 0.0, 0.0, 0.0 ],
[ 0.1, 0.0, 0.0, 0.0, 0.0, 0.9, 0.0 ],
[ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0 ]
]

# 将二维列表转换为np的数组形式
Pss = np.array(Pss)
# 奖励函数，对应于7个状态
rewards = [-2, -2, -2, 10, 1, -1, 0]
# 衰减因子
gamma = 0.5

In [60]:
def compute_return(start_index=0,
                   chain=None,
                   gamma=0.5) -> float:
    """
    计算一个马尔可夫过程中某个状态的奖励值
    :param start_index: 要计算的状态在链中的位置
    :param chain: 要计算的马尔可夫过程
    :param gamma: 衰减系数
    :return: 收获值
    """
    retrn,power,gamma = 0,0,0.5
    for i in range(start_index,len(chain)):
        retrn += np.power(gamma,power)*rewards[n_to_i[chain[i]]]
        power +=1
    return retrn

In [61]:
# 定义几条马尔科夫链，并计算收获值
chains =[
["C1", "C2", "C3", "Pass", "Sleep"],
["C1", "FB", "FB", "C1", "C2", "Sleep"],
["C1", "C2", "C3", "Pub", "C2", "C3", "Pass", "Sleep"],
["C1", "FB", "FB", "C1", "C2", "C3", "Pub", "C1", "FB","FB", "FB", "C1", "C2", "C3", "Pub", "C2", "Sleep"]
]
compute_return(0,chains[3],gamma=0.5)

-3.196044921875

In [62]:
def compute_value(Pss, rewards, gamma=0.05):
    """
    通过求解矩阵方程的形式直接计算状态的价值
    :param Pss: 状态转移概率矩阵 Shape(7,7)
    :param rewards: 即时奖励
    :param gamma: 衰减系数
    :return: values各个状态的价值
    """
    # 将rewards转换为numpy数组的形式，并修改为列向量
    rewards = np.array(rewards).reshape((-1,1))
    # np.eye(7,7)为单位矩阵，inv方法求矩阵的逆
    values = np.dot(np.linalg.inv(np.eye(7,7) - gamma*Pss), rewards)
    return values

compute_value(Pss,rewards,gamma=0.99999)

array([[-12.54073351],
       [  1.45690179],
       [  4.32117045],
       [ 10.        ],
       [  0.80308417],
       [-22.53857963],
       [  0.        ]])