In [18]:
import numpy as np

def s_rectangle(nominal_P, radius):
    """
    S-rectangle 不確実性集合を生成する関数

    Args:
        nominal_P (np.ndarray): 名目上の遷移確率 (|S| x |A| x |S| の形状)
        radius (float): 不確実性集合の半径 (L1ノルム)

    Returns:
        list: S-rectangle 不確実性集合に含まれる遷移確率のリスト
    """

    num_states = nominal_P.shape[0]
    num_actions = nominal_P.shape[1]
    uncertainty_set = []

    # 簡単な例として、名目上の遷移確率から半径 radius 以内にある遷移確率をランダムに生成
    for _ in range(10): # 10個の遷移確率を生成（例）
        P = nominal_P.copy()
        for s in range(num_states):
            perturbation = np.random.uniform(-radius, radius, size=(num_actions, num_states))
            P[s] += perturbation

            # 各行動における遷移確率が確率分布であることを保証
            P[s] = np.clip(P[s], 0, 1)  # 0以下、1以上をクリップ
            for a in range(num_actions):
                P[s, a] /= np.sum(P[s, a])

        uncertainty_set.append(P)


    return uncertainty_set


# 使用例:
num_states = 2
num_actions = 3
nominal_P = np.random.dirichlet(np.ones(num_states), size=(num_states, num_actions)) # ランダムに名目遷移確率を生成
radius = 0.1
uncertainty_set_0 = s_rectangle(nominal_P, radius)



In [22]:
import numpy as np

def sa_rectangle(nominal_P, radius):
    """
    S,A-rectangle 不確実性集合を生成する関数

    Args:
        nominal_P (np.ndarray): 名目上の遷移確率 (|S| x |A| x |S| の形状)
        radius (float): 不確実性集合の半径 (L1ノルム)

    Returns:
        list: S,A-rectangle 不確実性集合に含まれる遷移確率のリスト
    """


    num_states = nominal_P.shape[0]
    num_actions = nominal_P.shape[1]
    uncertainty_set = []

    # 簡単な例として、名目上の遷移確率から半径 radius 以内にある遷移確率をランダムに生成
    for _ in range(10): # 10個の遷移確率を生成（例）
        P = nominal_P.copy()
        for s in range(num_states):
            for a in range(num_actions):
                perturbation = np.random.uniform(-radius, radius, size=num_states)
                P[s, a] += perturbation

                # 遷移確率が確率分布であることを保証
                P[s, a] = np.clip(P[s, a], 0, 1)
                P[s, a] /= np.sum(P[s, a])

        uncertainty_set.append(P)


    return uncertainty_set


# 使用例:
num_states = 2
num_actions = 3
nominal_P = np.random.dirichlet(np.ones(num_states), size=(num_states, num_actions)) # ランダムに名目遷移確率を生成

radius = 0.1
uncertainty_set = sa_rectangle(nominal_P, radius)


print(uncertainty_set_0)
print(uncertainty_set)

[array([[[0.16640855, 0.83359145],
        [0.63529639, 0.36470361],
        [0.33590271, 0.66409729]],

       [[0.87729351, 0.12270649],
        [0.64995003, 0.35004997],
        [0.65757843, 0.34242157]]]), array([[[0.12395176, 0.87604824],
        [0.63098724, 0.36901276],
        [0.44974272, 0.55025728]],

       [[0.88635245, 0.11364755],
        [0.62637315, 0.37362685],
        [0.62932562, 0.37067438]]]), array([[[0.21316077, 0.78683923],
        [0.71156958, 0.28843042],
        [0.26915698, 0.73084302]],

       [[1.        , 0.        ],
        [0.64919538, 0.35080462],
        [0.64584094, 0.35415906]]]), array([[[0.10725714, 0.89274286],
        [0.66374296, 0.33625704],
        [0.41140388, 0.58859612]],

       [[0.94819997, 0.05180003],
        [0.5939826 , 0.4060174 ],
        [0.5984847 , 0.4015153 ]]]), array([[[0.14328782, 0.85671218],
        [0.59548626, 0.40451374],
        [0.38516969, 0.61483031]],

       [[0.97638473, 0.02361527],
        [0.69199338, 0.30

In [1]:
import numpy as np

def s_rectangle(nominal_P, radius):
    # ... (s_rectangle関数の定義は省略)

def sa_rectangle(nominal_P, radius):
    # ... (sa_rectangle関数の定義は省略)

def project_to_simplex(y):
    """確率単体への射影"""
    n = len(y)
    u = np.sort(y)[::-1]
    cssv = np.cumsum(u)
    rho = np.nonzero(u * np.arange(1, n + 1) > (cssv - 1))[0][-1]
    theta = (cssv[rho] - 1) / (rho + 1)
    return np.maximum(y - theta, 0)

def subroutine_A(b0, epsilon, T, alpha, J_hat, J_hat_partial, num_states, num_actions, num_constraints, P, c0, c1, b1, gamma, rho):
    """サブルーチン A(b0)"""

    # 方策の初期化 (一様分布)
    pi = np.ones((num_states, num_actions)) / num_actions



    def calculate_return(pi_current, c_current, p_current):
        v = np.zeros(num_states)
        for s in range(num_states):
            for a in range(num_actions):
                for s_prime in range(num_states):
                    v[s] += pi_current[s, a] * p_current[s, a, s_prime] * (c_current[s, a] + gamma * v[s_prime])
        return rho @ v


    def worst_case_return(pi_current, c_current, uncertainty_set):

        return max([calculate_return(pi_current, c_current, p) for p in uncertainty_set])

    def J_n(pi_current, c_current, n):
        if n == 0: # n=0は目的関数、それ以外は制約関数
            uncertainty_set = sa_rectangle(P,0.1) # 例としてSA-rectangleを使用。パラメータは適宜調整
        else:
            uncertainty_set = sa_rectangle(P,0.1)  # 制約に対応する不確実性集合 (適宜変更)
        return worst_case_return(pi_current, c_current, uncertainty_set)

    def grad_J(pi_current, c_current, n):
        if n == 0:
            uncertainty_set = sa_rectangle(P, 0.1)
        else:
            uncertainty_set = sa_rectangle(P, 0.1)  # 制約に対応する不確実性集合 (適宜変更)

        worst_p = None
        max_return = -1

        for p in uncertainty_set: # 各遷移確率でリターンを計算し、最悪ケースを記録
            current_return = calculate_return(pi_current, c_current, p)
            if current_return > max_return:
                max_return = current_return
                worst_p = p


        grad = np.zeros_like(pi_current)
        for s in range(num_states):
            for a in range(num_actions):
                grad[s, a] = (1/(1-gamma)) * rho[s] * calculate_return(pi_current,c_current,worst_p)
        return grad

    def delta_b0(pi_current, b0_current):
        return max(J_n(pi_current, c0, 0) - b0_current, J_n(pi_current, c1, 1) - b1)

    for t in range(T):
        # 制約nの選択
        n_t = np.argmax([J_n(pi,c0,0) - b0 if n==0 else J_n(pi,c1,n) - b1 for n in range(num_constraints + 1) ])


        # 劣勾配の推定

        g_t = grad_J(pi,c0 if n_t == 0 else c1,n_t)

        # 方策の更新
        pi_update = pi - alpha * g_t
        for s in range(num_states):
            pi_update[s] = project_to_simplex(pi_update[s])  # 各状態ごとに確率単体へ射影

        pi = pi_update

    return pi



# 使用例（他のパラメータ、RCMDPの設定は上記の例と同様):
epsilon = 0.01
T = 100
alpha = 0.1
b0 = 0.8



# J_hat, J_hat_partial は仮の関数として定義
J_hat = lambda pi, n: 0  # ダミー
J_hat_partial = lambda pi, n: np.zeros((num_states, num_actions)) # ダミー


optimal_pi = subroutine_A(b0, epsilon, T, alpha, J_hat, J_hat_partial, num_states, num_actions, num_constraints, P, c0, c1, b1, gamma, rho)
print(optimal_pi)

IndentationError: expected an indented block after function definition on line 3 (4089038156.py, line 6)