In [2]:
import pandas as pd

#pivot_df = pd.read_csv('synthetic_scenario_one.csv')
pivot_df = pd.read_csv('synthetic_scenario_three.csv')

# Fill missing values with the column mean
pivot_df = pivot_df.apply(lambda x: x.fillna(x.mean()), axis=0)

# Parameters
M = 2  # Maximum number of nodes that can be polled
theta = 0.5  # Threshold for reward condition
penalty = -1.0  # Penalty for polling when difference is <= theta
initial_value = 20  # Initial estimate for last polled values
aoii_penalty = 1.0
reward = 1.0

# Initialize last polled values
last_polled_values = {f'mote{i}': initial_value for i in range(1, 51)}

def run_oracle_simulation(M):
    last_update_times = {f'mote{i}': 0 for i in range(1, 51)}
    cumulative_reward = 0  # Total cumulative reward
    cumulative_rewards = []  # Store cumulative average reward over time
    valuable_sensor_data = []

    for t in range(len(pivot_df)):  # Loop over each time step
        current_time_step = t
        rewards = {}

        # Calculate rewards for each mote
        for mote in last_update_times:
            current_value = pivot_df.loc[current_time_step, mote]
            last_value = last_polled_values[mote]

            # Reward is the difference between the current and last value
            rewards[mote] = abs(current_value - last_value)

        # Filter nodes with rewards greater than the threshold
        eligible_nodes = {mote: reward for mote, reward in rewards.items() if reward > theta}

        # Select the top M nodes among eligible nodes
        top_m_nodes = sorted(eligible_nodes, key=eligible_nodes.get, reverse=True)[:M]

        total_reward_for_round = 0

        # Process the selected nodes
        for mote in top_m_nodes:
            measured_value = pivot_df.loc[current_time_step, mote]
            last_value = last_polled_values[mote]

            # Calculate reward or penalty
            if abs(measured_value - last_value) > theta:
                total_reward_for_round += reward  # Reward
            else:
                total_reward_for_round += penalty  # Penalty

        # Divide total reward for the round by M to standardize
        if M > 0:  # Avoid division by zero
            total_reward_for_round /= M

        # Update cumulative reward and store it
        cumulative_reward += total_reward_for_round
        cumulative_rewards.append(cumulative_reward / (t + 1))  # Average cumulative reward over time

    return cumulative_rewards


In [None]:
# Main function to simulate Whittle AoII with rewards
def run_simulation_whittle_aoii(pivot_df, columns, M, theta, penalty):
    cumulative_reward = 0  # Track total cumulative reward
    cumulative_rewards = []  # Store cumulative average reward over time
    last_update_times = {col: 0 for col in columns}  # Last update time for each node
    state_node = {col: np.array([20.0, 0.1]) for col in columns}  # Node states

    for t in range(len(pivot_df)):
        # Step 1: Compute Whittle indices for each node based on AoII
        whittle_indices = {}
        for col in columns:
            delta_t_dynamic = t - last_update_times[col]  # Time since last update
            last_state_value, last_rate_of_change = state_node[col]
            measured_value = pivot_df.loc[t, col]

            # Predict AoII
            current_aoii = abs(measured_value - last_state_value)  # Example AoII calculation
            future_aoii_passive = current_aoii + delta_t_dynamic  # Assume AoII increases passively
            future_aoii_active = 0  # AoII resets to 0 if polled
            q_passive = current_aoii + future_aoii_passive
            q_active = current_aoii + future_aoii_active + penalty

            # Calculate Whittle index
            whittle_indices[col] = q_passive - q_active

        # Step 2: Select top M nodes to poll based on Whittle indices
        nodes_to_poll = sorted(whittle_indices, key=whittle_indices.get, reverse=True)[:M]

        # Step 3: Poll selected nodes and calculate rewards
        for col in nodes_to_poll:
            measured_value = pivot_df.loc[t, col]
            last_state_value, last_rate_of_change = state_node[col]

            # Calculate reward after polling
            reward = calculate_reward(measured_value, last_state_value, theta, penalty)
            cumulative_reward += reward  # Update cumulative reward

            # Update node state and last update time
            state_node[col] = update_node_state_dewma(
                measured_value, last_state_value, last_rate_of_change, delta_t=1, beta_1=beta_1, beta_2=beta_2
            )
            last_update_times[col] = t

        # Step 4: Calculate cumulative average reward
        cumulative_rewards.append(cumulative_reward / (t + 1))

    return cumulative_rewards
