In [1]:
import sympy

from sympy import symbols

sympy.init_printing()

In [2]:
alpha, beta, gamma = symbols('alpha beta gamma')
v_hungry, v_full = symbols('v_hungry v_full')
q_hungry_eat, q_hungry_none, q_full_eat, q_full_none = symbols('q_hungry_eat q_hungry_none q_full_eat q_full_none')

In [3]:
# 求解示例Bellman最优方程

xy_tuples = ((0, 0), (1, 0), (0, 1), (1, 1))
for x, y in xy_tuples:
    system = sympy.Matrix((
        (1, 0, x - 1, -x, 0, 0, 0),
        (0, 1, 0, 0, -y, y - 1, 0),
        (-gamma, 0, 1, 0, 0, 0, -2),
        ((alpha - 1) * gamma, -alpha * gamma, 0, 1, 0, 0, 4 * alpha - 3),
        (-beta * gamma, (beta - 1) * gamma, 0, 0, 1, 0, -4 * beta + 2),
        (0, -gamma, 0, 0, 0, 1, 1)
    ))
    
    result = sympy.solve_linear_system(system, v_hungry, v_full, q_hungry_eat, q_hungry_none, q_full_eat, q_full_none)
    msgx = 'v(饿) = q(饿, {}吃)'.format('' if x else '不')
    msgy = 'v(饱) = q(饱, {}吃)'.format('不' if y else '')
    print('==== {}, {} ==== x = {}, y = {} ===='.format(msgx, msgy, x, y))
    display(result)

==== v(饿) = q(饿, 不吃), v(饱) = q(饱, 吃) ==== x = 0, y = 0 ====


⎧            -β⋅γ + 4⋅β + γ - 2                -1                    2        
⎨q_full_eat: ──────────────────, q_full_none: ─────, q_hungry_eat: ─────, q_hu
⎩                  γ - 1                      γ - 1                γ - 1      

           α⋅γ - 4⋅α - γ + 3           -1                2  ⎫
ngry_none: ─────────────────, v_full: ─────, v_hungry: ─────⎬
                 γ - 1                γ - 1            γ - 1⎭

==== v(饿) = q(饿, 吃), v(饱) = q(饱, 吃) ==== x = 1, y = 0 ====


⎧               2                          2                                  
⎪            α⋅γ  - 2⋅α⋅γ - 4⋅β⋅γ + 4⋅β - γ  + 3⋅γ - 2                -1      
⎨q_full_eat: ─────────────────────────────────────────, q_full_none: ─────, q_
⎪                       2          2                                 γ - 1    
⎩                    α⋅γ  - α⋅γ - γ  + 2⋅γ - 1                                

               2            2                                                 
            α⋅γ  - 2⋅α⋅γ - γ  - γ + 2                    -α⋅γ + (4⋅α - 3)⋅(γ -
hungry_eat: ─────────────────────────, q_hungry_none: ────────────────────────
               2          2                            2                      
            α⋅γ  - α⋅γ - γ  + 2⋅γ - 1                 γ ⋅(α - 1) - γ⋅(α - 1) +

                                                               ⎫
 1)              -1                 -α⋅γ + (4⋅α - 3)⋅(γ - 1)   ⎪
──────, v_full: ─────, v_hungry: ──────────────────────────────⎬
             

==== v(饿) = q(饿, 不吃), v(饱) = q(饱, 不吃) ==== x = 0, y = 1 ====


⎧                                                               2            2
⎪             2⋅(β⋅γ - (2⋅β - 1)⋅(γ - 1))                  - β⋅γ  + 3⋅β⋅γ + γ 
⎨q_full_eat: ──────────────────────────────, q_full_none: ────────────────────
⎪             2                                              2          2     
⎩            γ ⋅(β - 1) - γ⋅(β - 1) + γ - 1               β⋅γ  - β⋅γ - γ  + 2⋅

                                                            2            2    
 - 1                   2                   4⋅α⋅γ - 4⋅α - β⋅γ  + 3⋅β⋅γ + γ  - 4
─────, q_hungry_eat: ─────, q_hungry_none: ───────────────────────────────────
                     γ - 1                            2          2            
γ - 1                                              β⋅γ  - β⋅γ - γ  + 2⋅γ - 1  

                                                               ⎫
⋅γ + 3           2⋅(β⋅γ - (2⋅β - 1)⋅(γ - 1))                2  ⎪
──────, v_full: ──────────────────────────────, v_hungry: ─────⎬
             

==== v(饿) = q(饿, 吃), v(饱) = q(饱, 不吃) ==== x = 1, y = 1 ====


⎧                                                                          2  
⎪                 -2⋅α⋅γ - β⋅γ + 4⋅β + 2⋅γ - 2                        - α⋅γ  -
⎨q_full_eat: ──────────────────────────────────────, q_full_none: ────────────
⎪               2            2          2                            2        
⎩            α⋅γ  - α⋅γ + β⋅γ  - β⋅γ - γ  + 2⋅γ - 1               α⋅γ  - α⋅γ +

                2                                       2            2        
 α⋅γ + 3⋅β⋅γ + γ  - 1                       -2⋅α⋅γ + β⋅γ  + 2⋅β⋅γ - γ  - γ + 2
──────────────────────────, q_hungry_eat: ────────────────────────────────────
    2          2                             2            2          2        
 β⋅γ  - β⋅γ - γ  + 2⋅γ - 1                α⋅γ  - α⋅γ + β⋅γ  - β⋅γ - γ  + 2⋅γ -

                                                                              
                       2⋅α⋅γ - 4⋅α + 3⋅β⋅γ - 3⋅γ + 3                    -2⋅α⋅γ
──, q_hungry_none: ───────────────────────────────