In [2]:
import numpy as np
from spm.spm_softmax import spm_softmax


# 设置模型以计算状态预测误差
A = np.array([[0.8, 0.4],       
              [0.2, 0.6]])         # 似然

B_t1 = np.array([[0.9, 0.2], 
                 [0.1, 0.8]])      # 前一时间步的转移先验
    
B_t2 = np.array([[0.2, 0.3], 
                 [0.8, 0.7]])      # 当前时间步的转移先验
    
o = np.array([1, 0])               # 观测

s_pi_tau = np.array([0.5, 0.5])    # 状态的先验分布
s_pi_tau_minus_1 = np.array([0.5, 0.5])
s_pi_tau_plus_1 = np.array([0.5, 0.5])

v_0 = np.log(s_pi_tau)             # 去极化项（初始值）

B_t2_cross_intermediate = B_t2.T   # 转置 B_t2

B_t2_cross = spm_softmax(B_t2_cross_intermediate) # 归一化转置 B_t2 的列

# 计算状态预测误差（单次迭代）
state_error = 0.5 * (np.log(B_t1 @ s_pi_tau_minus_1) + np.log(B_t2_cross @ s_pi_tau_plus_1)) \
              + np.log(A.T @ o) - np.log(s_pi_tau) # 状态预测误差

v = v_0 + state_error             # 去极化

s = np.exp(v) / np.sum(np.exp(v)) # 更新后的状态分布

print(' ')
print('Prior Distribution over States:')
print(s_pi_tau)
print(' ')
print('State Prediction Error:')
print(state_error)
print(' ')
print('Depolarization:')
print(v)
print(' ')
print('Posterior Distribution over States:')
print(s)
print(' ')

 
Prior Distribution over States:
[0.5 0.5]
 
State Prediction Error:
[-0.17548846 -0.96897099]
 
Depolarization:
[-0.86863564 -1.66211817]
 
Posterior Distribution over States:
[0.68857861 0.31142139]
 


In [3]:
# 设置模型以计算结果预测误差
# 这最小化期望自由能（最大化奖励和信息增益）

# 计算两种策略下的风险（寻求奖励）

A = np.array([[0.9, 0.1],
              [0.1, 0.9]])   # 似然
 
S1 = np.array([0.9, 0.1])    # 策略1下的状态
S2 = np.array([0.5, 0.5])    # 策略2下的状态

C = np.array([1, 0])         # 首选结果

o_1 = A @ S1                 # 策略1下的预测结果
o_2 = A @ S2                 # 策略2下的预测结果
z = np.exp(-16)              # 添加到偏好分布中的小数以避免 log(0)

risk_1 = np.dot(o_1, np.log(o_1) - np.log(C + z)) # 策略1下的风险

risk_2 = np.dot(o_2, np.log(o_2) - np.log(C + z)) # 策略2下的风险

print(' ')
print('Risk Under Policy 1:')
print(risk_1)
print(' ')
print('Risk Under Policy 2:')
print(risk_2)
print(' ')

 
Risk Under Policy 1:
2.408606420911068
 
Risk Under Policy 2:
7.30685276317247
 


In [4]:
# 计算两种策略下的模糊性（寻求信息）

A = np.array([[0.4, 0.2],
              [0.6, 0.8]])   # 似然
 
s1 = np.array([0.9, 0.1])    # 策略1下的状态
s2 = np.array([0.1, 0.9])    # 策略2下的状态

ambiguity_1 = -np.dot(np.diag(A.T @ np.log(A)), s1) # 策略1下的模糊性

ambiguity_2 = -np.dot(np.diag(A.T @ np.log(A)), s2) # 策略2下的模糊性

print(' ')
print('Ambiguity Under Policy 1:')
print(ambiguity_1)
print(' ')
print('Ambiguity Under Policy 2:')
print(ambiguity_2)
print(' ')

 
Ambiguity Under Policy 1:
0.6557507426621495
 
Ambiguity Under Policy 2:
0.5176633478852948
 
