In [1]:
%load_ext autoreload
%autoreload 2

In [6]:
import numpy as np
from rl_envs_forge.envs.network_graph.network_graph import NetworkGraph

connectivity_matrix = np.array(
    [
        # 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15
        [0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0],  # 1
        [0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0],  # 2
        [1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],  # 3
        [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],  # 4
        [1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],  # 5
        [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0],  # 6
        [0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0],  # 7
        [0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0],  # 8
        [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],  # 9
        [0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0],  # 10
        [0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0],  # 11
        [0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1],  # 12
        [0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0],  # 13
        [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0],  # 14
        [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0],  # 15
    ]
)

desired_centrality = np.array(
    [0.08, # 1
    0.06, # 2
    0.14, # 3
    0.08, # 4
    0.04, # 5
    0.02, # 6
    0.06, # 7
    0.03, # 8
    0.04, # 9
    0.08, # 10
    0.13, # 11
    0.06, # 12
    0.04, # 13
    0.07, # 14
    0.07] # 15
)  # Example centrality

initial_opinions = np.linspace(0, 1, len(desired_centrality))

env = NetworkGraph(
    connectivity_matrix=connectivity_matrix,
    desired_centrality=desired_centrality,
    initial_opinions=initial_opinions,
    max_u=0.1,
    budget=10.0,
    desired_opinion=1.0,
    tau=1.0,
    max_steps=100
)

# env.render(mode="matplotlib")
env.reset()

array([0.        , 0.07142857, 0.14285714, 0.21428571, 0.28571429,
       0.35714286, 0.42857143, 0.5       , 0.57142857, 0.64285714,
       0.71428571, 0.78571429, 0.85714286, 0.92857143, 1.        ])

In [10]:
def optimal_control_strategy(env):
    """
    Implement the optimal control strategy using precomputed centralities and influence powers.

    Args:
        env (NetworkGraph): The NetworkGraph environment.
        
    Returns:
        np.ndarray: The control input vector to be applied.
    """
    # Use the precomputed centralities as the influence factors
    centralities = env.centralities
    opinions = env.opinions
    desired_opinion = env.desired_opinion
    
    # Calculate influence power for each agent
    influence_powers = centralities * np.abs(opinions - desired_opinion)
    
    # Sort agents based on influence power in descending order
    sorted_indices = np.argsort(influence_powers)[::-1]
    
    # Initialize control inputs
    u = np.zeros(env.num_agents)
    remaining_budget = env.max_u * env.num_agents
    
    for idx in sorted_indices:
        if remaining_budget <= 0:
            break
        if u[idx] < env.max_u:
            u[idx] = min(env.max_u, remaining_budget)
            remaining_budget -= u[idx]

    return u

# Applying the optimal control strategy in the environment
optimal_u = optimal_control_strategy(env)
print(optimal_u)

opinions, reward, done, truncated, info = env.step(optimal_u)
opinions

[0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1]


array([0.        , 0.        , 1.        , 1.        , 1.        ,
       0.        , 0.        , 0.47311003, 1.        , 1.        ,
       0.76480887, 1.        , 0.14941828, 0.42669282, 0.37241355])

In [11]:
import numpy as np
from rl_envs_forge.envs.network_graph.network_graph import NetworkGraph

connectivity_matrix = np.array(
    [
        # 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15
        [0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0],  # 1
        [0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0],  # 2
        [1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],  # 3
        [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],  # 4
        [1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],  # 5
        [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0],  # 6
        [0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0],  # 7
        [0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0],  # 8
        [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],  # 9
        [0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0],  # 10
        [0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0],  # 11
        [0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1],  # 12
        [0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0],  # 13
        [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0],  # 14
        [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0],  # 15
    ]
)

desired_centrality = np.array(
    [0.08, # 1
    0.06, # 2
    0.14, # 3
    0.08, # 4
    0.04, # 5
    0.02, # 6
    0.06, # 7
    0.03, # 8
    0.04, # 9
    0.08, # 10
    0.13, # 11
    0.06, # 12
    0.04, # 13
    0.07, # 14
    0.07] # 15
)  # Example centrality

initial_opinions = np.linspace(0, 1, len(desired_centrality))

env = NetworkGraph(
    connectivity_matrix=connectivity_matrix,
    desired_centrality=desired_centrality,
    initial_opinions=initial_opinions,
    max_u=0.1,
    budget=10.0,
    desired_opinion=1.0,
    tau=1.0,
    max_steps=100
)

# env.render(mode="matplotlib")
env.reset()

def dynamic_programming_strategy(env, M, Q):
    """
    Implement the dynamic programming algorithm to determine the optimal budget allocation across campaigns.

    Args:
        env (NetworkGraph): The NetworkGraph environment.
        M (int): The number of campaigns.
        Q (int): The total budget.

    Returns:
        np.ndarray: The optimal budget allocation for each campaign.
    """
    N = env.num_agents  # Number of agents
    ubar = env.max_u  # Maximum control input
    v_rho = env.centralities  # Influence vector (precomputed centralities)
    x_t0 = env.opinions  # Initial opinions
    d = env.desired_opinion  # Desired opinion
    
    # Function f0(b0) for the first campaign
    def f0(b0, v_rho, x_t0, d, ubar):
        term1 = np.sum(v_rho[:b0] * (1 - ubar) * np.abs(x_t0[:b0] - d))
        term2 = np.sum(v_rho[b0:] * np.abs(x_t0[b0:] - d))
        return term1 + term2

    # Function f(b) for subsequent campaigns
    def f(b, v_rho, ubar):
        return 1 - ubar * np.sum(v_rho[:b])

    # Initialize the value function Vk
    V = np.zeros((M + 1, Q + 1))  # V[k, r] represents Vk(rk)

    # Base case: the last campaign
    for r in range(Q + 1):
        V[M, r] = np.log(f(r, v_rho, ubar))

    # Backward pass for intermediate campaigns
    for k in range(M - 1, -1, -1):
        for r in range(Q + 1):
            V[k, r] = np.min([np.log(f(b, v_rho, ubar)) + V[k + 1, r - b] for b in range(min(N, r) + 1)])

    # Initial campaign calculation
    V0 = np.min([np.log(f0(b0, v_rho, x_t0, d, ubar)) + V[1, Q - b0] for b0 in range(min(N, Q) + 1)])

    # Forward pass to find the optimal budget allocations
    b_star = np.zeros(M + 1, dtype=int)
    b_star[0] = np.argmin([np.log(f0(b0, v_rho, x_t0, d, ubar)) + V[1, Q - b0] for b0 in range(min(N, Q) + 1)])

    for k in range(1, M):
        b_star[k] = np.argmin([np.log(f(b, v_rho, ubar)) + V[k + 1, Q - np.sum(b_star[:k]) - b] for b in range(min(N, Q - np.sum(b_star[:k])) + 1)])

    b_star[M] = Q - np.sum(b_star[:M])

    return b_star

# Example usage with the environment
M = 5  # Number of campaigns
Q = 20  # Total budget

optimal_budget_allocation = dynamic_programming_strategy(env, M, Q)
print("Optimal budget allocation for each campaign:", optimal_budget_allocation)

# Apply the first step of the optimal control strategy
optimal_u = env.max_u * optimal_budget_allocation[0] * env.centralities / sum(env.centralities)
opinions, reward, done, truncated, info = env.step(optimal_u)
print("Opinions after first campaign:", opinions)


Optimal budget allocation for each campaign: [4 3 3 3 3 4]
Opinions after first campaign: [1.         0.59143128 0.04959889 0.         0.         1.
 1.         0.47332365 0.         0.6060361  0.7279755  0.5656297
 0.59444847 0.84665071 0.55004193]
