In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
pivot_df = pd.read_csv('synthetic_scenario_three.csv')
# Set parameters
penalty = -0.5  # Penalty for polling when difference <= theta
aoii_penalty = 0.5
theta = 0.5  # Threshold for reward
beta_1 = 0.3  # dEWMA parameter for state value
beta_2 = 0.3  # dEWMA parameter for rate of change
columns = pivot_df.columns  # Get the column names dynamically
num_nodes = len(columns)  # Number of nodes based on dataset columns
num_time_steps = len(pivot_df)  # Total time steps based on dataset length

# Function to calculate Age of Incorrect Information (AoII) at the sink
def calculate_aoii_sink(current_time, last_received_time, last_rate_of_change):
    return abs((current_time - last_received_time) * last_rate_of_change)

# Helper function to update state using dEWMA
def update_node_state_dewma(measured_value, last_state_value, last_rate_of_change, delta_t, beta_1, beta_2):
    x1 = beta_1 * measured_value + (1 - beta_1) * (last_state_value + last_rate_of_change * delta_t)
    x2 = beta_2 * (x1 - last_state_value) / delta_t + (1 - beta_2) * last_rate_of_change
    return x1, x2

# Helper function to calculate reward
def calculate_reward(measured_value, last_state_value, theta, penalty):
    if abs(measured_value - last_state_value) > theta:
        return 1  # Reward
    return penalty  # Penalty

# Main function to simulate Whittle AoII with rewards
def run_simulation_whittle_aoii(pivot_df, columns, M, theta, penalty, aoii_penalty):
    cumulative_reward = 0  # Track total cumulative reward
    cumulative_rewards = []  # Store cumulative average reward over time
    last_update_times = {col: 0 for col in columns}  # Last update time for each node
    state_node = {col: np.array([20.0, 0.1]) for col in columns}  # Node states

    for t in range(len(pivot_df)):
        # Step 1: Compute Whittle indices for each node based on AoII
        whittle_indices = {}
        for col in columns:
            delta_t_dynamic = t - last_update_times[col]  # Time since last update
            last_state_value, last_rate_of_change = state_node[col]
            measured_value = pivot_df.loc[t, col]

            # Correct AoII calculation at the sink using rate of change
            current_aoii = calculate_aoii_sink(t, last_update_times[col], last_rate_of_change)
            future_aoii_passive = current_aoii + delta_t_dynamic  # Updated AoII increase assumption for passive
            future_aoii_active = 0  # AoII resets to 0 if polled

            # Whittle index calculations
            q_passive = current_aoii + future_aoii_passive
            q_active = current_aoii + future_aoii_active + aoii_penalty
            whittle_indices[col] = q_passive - q_active

        # Step 2: Select top M nodes to poll based on Whittle indices
        nodes_to_poll = [col for col in whittle_indices if whittle_indices[col] >= 0]
        if len(nodes_to_poll) > M:
            nodes_to_poll = sorted(nodes_to_poll, key=whittle_indices.get, reverse=True)[:M]

        # Step 3: Poll selected nodes and calculate rewards
        for col in nodes_to_poll:
            measured_value = pivot_df.loc[t, col]
            last_state_value, last_rate_of_change = state_node[col]

            # Calculate reward after polling
            reward = calculate_reward(measured_value, last_state_value, theta, penalty)
            cumulative_reward += reward  # Update cumulative reward

            # Update node state and last update time
            state_node[col] = update_node_state_dewma(
                measured_value, last_state_value, last_rate_of_change, delta_t=1, beta_1=beta_1, beta_2=beta_2
            )
            last_update_times[col] = t

        # Step 4: Calculate cumulative average reward
        cumulative_rewards.append(cumulative_reward / (t + 1))

    return cumulative_rewards

# Run the simulation
cumulative_rewards_whittle = run_simulation_whittle_aoii(pivot_df, columns, M, theta, penalty, aoii_penalty)

# Save cumulative rewards to CSV
pd.DataFrame(cumulative_rewards_whittle, columns=["cumulative_reward"]).to_csv('cumulative_rewards_whittle_aoii.csv', index=False)

# Plot cumulative average reward over time
plt.figure(figsize=(10, 5))
plt.plot(range(1, len(cumulative_rewards_whittle) + 1), cumulative_rewards_whittle, label='Whittle AoII Cumulative Average Reward')
plt.xlabel('Time Steps', fontsize=14, fontweight='bold')
plt.ylabel('Cumulative Average Reward', fontsize=14, fontweight='bold')
plt.title('Whittle AoII Cumulative Average Reward over Time', fontsize=14, fontweight='bold')
plt.legend()
plt.grid(True, which='both', linestyle='--', linewidth=0.5)
plt.tick_params(axis='both', which='major', labelsize=12)
plt.xlim(0, len(cumulative_rewards_whittle))

# Save the plot
plt.savefig('whittle_aoii_cumulative_average_reward.png')

plt.show()


NameError: name 'M' is not defined

In [None]:
def run_simulation_with_rate_of_change_logging(pivot_df, num_nodes, M, theta, penalty, covariance_threshold):
    """Runs the simulation using Kalman filter (1D) with two-step pooling and logs rate of change."""
    
    state_estimates = {f'mote{i}': np.array([[20], [0.1]]) for i in range(1, num_nodes + 1)}
    P = {f'mote{i}': np.zeros((2, 2)) for i in range(1, num_nodes + 1)}  # Initialize covariance
    last_observations = {f'mote{i}': 0.0 for i in range(1, num_nodes + 1)}  # Last observed values
    last_update_times = {f'mote{i}': 0 for i in range(1, num_nodes + 1)}  # Last measurement times
    
    cumulative_reward = 0
    cumulative_rewards = []  # To store cumulative rewards over time
    rate_of_change_log = []  # Log for rate of change

    def predict_node_state(x_hat, delta_t):
        A = get_state_transition_matrix(delta_t)
        return A @ x_hat

    for idx, row in pivot_df.iterrows():
        current_time_step = idx

        # Print rate of change for the first 20 time steps
        if current_time_step < 20:
            for mote, state in state_estimates.items():
                rate_of_change = state[1, 0]  # Extract rate of change from the state
                print(f"Time Step {current_time_step}, Node {mote}, Rate of Change: {rate_of_change}")

        # Step 1: Select nodes based on highest covariance
        covariance_traces = {mote: np.trace(P[mote]) for mote in state_estimates}
        sorted_motes_by_trace = sorted(covariance_traces, key=covariance_traces.get, reverse=True)
        top_covariance_nodes = [mote for mote in sorted_motes_by_trace if covariance_traces[mote] > covariance_threshold][:M]

        # Step 2: Filter nodes with significant prediction error
        nodes_to_poll = []
        for mote in top_covariance_nodes:
            column_name = mote
            if column_name in row:
                delta_t = max(current_time_step - last_update_times[column_name], 1)
                predicted_state = predict_node_state(state_estimates[column_name], delta_t)
                last_observed_value = last_observations[column_name]
                prediction_error = abs(last_observed_value - predicted_state[0, 0])

                # Include node only if prediction error exceeds threshold
                if prediction_error > theta:
                    nodes_to_poll.append(mote)

        total_reward_for_round = 0

        # Process selected nodes for reward calculation
        for mote in nodes_to_poll:
            column_name = mote
            measured_value = row[column_name]  # Current measurement
            last_observed_value = last_observations[column_name]  # Last observed value

            # Reward or penalty based on actual measurement
            if abs(last_observed_value - measured_value) > theta:
                total_reward_for_round += 1  # Reward
            else:
                total_reward_for_round += penalty  # Penalty

            # Update Kalman filter
            previous_state = state_estimates[column_name]
            delta_t = max(current_time_step - last_update_times[column_name], 1)
            A = get_state_transition_matrix(delta_t)
            Pp = A @ P[column_name] @ A.T + Q
            xp = A @ previous_state
            z = np.array([[measured_value]])
            K = Pp @ H.T @ np.linalg.inv(H @ Pp @ H.T + R)
            x_hat = xp + K @ (z - H @ xp)
            P_hat = Pp - K @ H @ Pp

            # Update state estimates and covariance
            state_estimates[column_name] = x_hat
            P[column_name] = P_hat
            last_observations[column_name] = measured_value
            last_update_times[column_name] = current_time_step

        # Update cumulative rewards
        cumulative_reward += total_reward_for_round
        cumulative_rewards.append(cumulative_reward / (idx + 1))

    return cumulative_rewards


In [None]:
H = np.array([[1, 0]])                # Measurement matrix
Q = np.array([[1e-5, 0], [0, 1e-5]])  # Process noise covariance
R = np.array([[0.5]])                 # Measurement noise covariance

def get_state_transition_matrix(delta_t):
    return np.array([[1, delta_t], [0, 1]])

def run_simulation_with_two_step_pooling(pivot_df, num_nodes, M, theta, penalty, covariance_threshold):
    """Runs the simulation using Kalman filter (1D) with two-step pooling."""
    
    state_estimates = {f'mote{i}': np.array([[20], [0.1]]) for i in range(1, num_nodes + 1)}
    P = {f'mote{i}': np.zeros((2, 2)) for i in range(1, num_nodes + 1)}  # Initialize covariance
    last_update_times = {f'mote{i}': 0 for i in range(1, num_nodes + 1)}  # Last measurement times
    
    cumulative_reward = 0
    cumulative_rewards = []  # To store cumulative rewards over time
    polled_nodes_info = []  # Log for selected nodes and their states

    for idx, row in pivot_df.iterrows():
        current_time_step = idx

        # Step 1: Select nodes based on highest covariance
        covariance_traces = {mote: np.trace(P[mote]) for mote in state_estimates}
        sorted_motes_by_trace = sorted(covariance_traces, key=covariance_traces.get, reverse=True)
        top_covariance_nodes = [mote for mote in sorted_motes_by_trace if covariance_traces[mote] > covariance_threshold][:M]

        # Step 2: Filter nodes based on the rate of change
        nodes_to_poll = []
        for mote in top_covariance_nodes:
            column_name = mote
            if column_name in row:
                delta_t = max(current_time_step - last_update_times[column_name], 1)
                previous_state = state_estimates[column_name]
                rate_of_change = abs(previous_state[1, 0])  # Extract rate of change
                projected_change = rate_of_change * delta_t

                # Include node only if projected change exceeds the threshold
                if projected_change > theta:
                    nodes_to_poll.append(mote)
                    polled_nodes_info.append({
                        "time_step": current_time_step,
                        "node": mote,
                        "projected_change": projected_change,
                        "covariance_trace": covariance_traces[mote]
                    })

        total_reward_for_round = 0

        # Process selected nodes for reward calculation
        for mote in nodes_to_poll:
            column_name = mote
            measured_value = row[column_name]  # Current measurement

            # Reward or penalty based on the actual measurement
            if abs(state_estimates[column_name][0, 0] - measured_value) > theta:
                total_reward_for_round += 1  # Reward
            else:
                total_reward_for_round += penalty  # Penalty

            # Update Kalman filter
            previous_state = state_estimates[column_name]
            delta_t = max(current_time_step - last_update_times[column_name], 1)
            A = get_state_transition_matrix(delta_t)
            Pp = A @ P[column_name] @ A.T + Q
            xp = A @ previous_state
            z = np.array([[measured_value]])
            K = Pp @ H.T @ np.linalg.inv(H @ Pp @ H.T + R)
            x_hat = xp + K @ (z - H @ xp)
            P_hat = Pp - K @ H @ Pp

            # Update state estimates and covariance
            state_estimates[column_name] = x_hat
            P[column_name] = P_hat
            last_update_times[column_name] = current_time_step

        # Update cumulative rewards
        cumulative_reward += total_reward_for_round
        cumulative_rewards.append(cumulative_reward / (idx + 1))

    # Save selected nodes information to CSV
    pd.DataFrame(polled_nodes_info).to_csv('polled_nodes_two_step_pooling.csv', index=False)

    return cumulative_rewards, pd.DataFrame(polled_nodes_info)

# Run simulation
cumulative_rewards, polled_nodes_info = run_simulation_with_two_step_pooling(
    pivot_df, num_nodes=50, M=10, theta=0.1, penalty=-0.5, covariance_threshold=0.5
)

# Save cumulative rewards to CSV
pd.DataFrame(cumulative_rewards, columns=["cumulative_reward"]).to_csv('cumulative_rewards_two_step.csv', index=False)

# Plot cumulative average reward
plt.figure(figsize=(10, 5))
plt.plot(range(1, len(cumulative_rewards) + 1), cumulative_rewards, label='Two-Step Pooling Reward')
plt.xlabel('Time Steps', fontsize=14, fontweight='bold')
plt.ylabel('Cumulative Average Reward', fontsize=14, fontweight='bold')
plt.title('Two-Step Pooling - Cumulative Average Reward', fontsize=14, fontweight='bold')
plt.legend()
plt.grid(True, which='both', linestyle='--', linewidth=0.5)
plt.tick_params(axis='both', which='major', labelsize=12)
plt.xlim(0, len(cumulative_rewards))

# Save the plot
plt.savefig('two_step_pooling_reward.png')
plt.show()


In [None]:
def run_simulation_kalman_with_covariance_check(pivot_df, num_nodes, M, theta, penalty, covariance_threshold):
    """Runs the simulation using Kalman filter (1D) and computes cumulative average reward with covariance threshold check."""
    
    # Initialize state estimates and covariance matrices (scalars)
    state_estimates = {f'mote{i}': 20.0 for i in range(1, num_nodes + 1)}  # Initial state estimates
    P = {f'mote{i}': 0.5 for i in range(1, num_nodes + 1)}  # Initial covariance matrices (scalars)
    last_observations = {f'mote{i}': 20.0 for i in range(1, num_nodes + 1)}  # Initialize last observed values
    last_update_times = {f'mote{i}': 0 for i in range(1, num_nodes + 1)}  # Time when the last measurement was received

    cumulative_reward = 0  # Track total cumulative reward
    cumulative_rewards = []  # Store cumulative average reward over time

    # List to log the selected nodes and their covariance error
    polled_nodes_info = []

    for idx, row in pivot_df.iterrows():
        current_time_step = idx

        # **Step 1**: Select nodes with the highest covariance
        covariance_traces = {mote: P[f'mote{mote}'] for mote in range(1, num_nodes + 1)}
        sorted_motes_by_trace = sorted(covariance_traces, key=covariance_traces.get, reverse=True)
        top_covariance_nodes = sorted_motes_by_trace[:M]

        # **Step 2**: Further filter nodes with covariance > threshold
        nodes_to_poll = []
        for mote in top_covariance_nodes:
            column_name = f'mote{mote}'
            if column_name in row:
                covariance = P[column_name]  # Current covariance value

                # Include node if covariance > threshold
                if covariance > covariance_threshold:
                    nodes_to_poll.append(mote)

                    # Log the polled node and its covariance
                    polled_nodes_info.append({
                        "time_step": current_time_step,
                        "node": mote,
                        "covariance": covariance
                    })

        total_reward_for_round = 0

        # Process the selected nodes
        for mote in nodes_to_poll:
            column_name = f'mote{mote}'
            if column_name in row:
                measured_value = row[column_name]  # Current measurement
                last_observed_value = last_observations[column_name]  # Last observed value

                # Reward or penalty based on the difference between the last observation and the current measured value
                if abs(last_observed_value - measured_value) > theta:
                    total_reward_for_round += 1  # Reward
                else:
                    total_reward_for_round += penalty  # Penalty

                # Prediction step
                previous_state = state_estimates[column_name]
                previous_P = P[column_name]
                predicted_covariance = previous_P + Q  # Update covariance with process noise

                # Kalman filter update step
                K = predicted_covariance / (predicted_covariance + R)  # Kalman gain
                updated_state = previous_state + K * (measured_value - previous_state)  # Update state estimate
                updated_covariance = (1 - K) * predicted_covariance  # Update covariance

                # Update state estimates, covariance, and last observed value
                state_estimates[column_name] = updated_state
                P[column_name] = updated_covariance
                last_observations[column_name] = measured_value  # Update last observed value
                last_update_times[column_name] = current_time_step  # Update last update time

        # Update cumulative reward
        cumulative_reward += total_reward_for_round
        cumulative_rewards.append(cumulative_reward / (idx + 1))  # Compute cumulative average reward

    # Convert polled nodes info to a DataFrame for analysis
    polled_nodes_info_df = pd.DataFrame(polled_nodes_info)
    polled_nodes_info_df.to_csv('polled_nodes_covariance_check.csv', index=False)  # Save to CSV

    return cumulative_rewards, polled_nodes_info_df

# Parameters
M = 10  # Number of nodes to poll
theta = 0.5  # Threshold for significant difference
penalty = -0.5  # Penalty for insignificant difference
Q = 0.001  # Process noise covariance (scalar)
R = 0.1  # Measurement noise covariance (scalar)
covariance_threshold = 0.5  # Threshold for covariance error

# Run the simulation
cumulative_rewards_kalman, polled_nodes_info = run_simulation_kalman_with_covariance_check(
    pivot_df, num_nodes, M, theta, penalty, covariance_threshold
)

# View the polled nodes' info DataFrame
print(polled_nodes_info)

# Save cumulative rewards to CSV
pd.DataFrame(cumulative_rewards_kalman, columns=["cumulative_reward"]).to_csv('cumulative_rewards_kalman_covariance.csv', index=False)

# Plot cumulative average reward over time
plt.figure(figsize=(10, 5))
plt.plot(range(1, len(cumulative_rewards_kalman) + 1), cumulative_rewards_kalman, label='Kalman Filter with Covariance Check')
plt.xlabel('Time Steps', fontsize=14, fontweight='bold')
plt.ylabel('Cumulative Average Reward', fontsize=14, fontweight='bold')
plt.title('Kalman Filter with Covariance Check - Cumulative Average Reward', fontsize=14, fontweight='bold')
plt.legend()
plt.grid(True, which='both', linestyle='--', linewidth=0.5)
plt.tick_params(axis='both', which='major', labelsize=12)
plt.xlim(0, len(cumulative_rewards_kalman))

# Save the plot
plt.savefig('kalman_covariance_check_reward.png')

plt.show()


In [None]:
 def run_simulation_kalman_with_covariance_check(pivot_df, num_nodes, M, theta, penalty, covariance_threshold):
    """Runs the simulation using Kalman filter (1D) and computes cumulative average reward with covariance threshold check."""
    
    # Initialize state estimates and covariance matrices (scalars)
    state_estimates = {f'mote{i}': 20.0 for i in range(1, num_nodes + 1)}  # Initial state estimates
    P = {f'mote{i}': 1 for i in range(1, num_nodes + 1)}  # Initial covariance matrices (scalars)
    last_observations = {f'mote{i}': 0.0 for i in range(1, num_nodes + 1)}  # Initialize last observed values
    last_update_times = {f'mote{i}': 0 for i in range(1, num_nodes + 1)}  # Time when the last measurement was received

    cumulative_reward = 0  # Track total cumulative reward
    cumulative_rewards = []  # Store cumulative average reward over time

    # List to log the selected nodes and their covariance error
    polled_nodes_info = []

    for idx, row in pivot_df.iterrows():
        current_time_step = idx

        # **Step 1**: Select nodes with the highest covariance
        covariance_traces = {mote: P[f'mote{mote}'] for mote in range(1, num_nodes + 1)}
        sorted_motes_by_trace = sorted(covariance_traces, key=covariance_traces.get, reverse=True)
        top_covariance_nodes = sorted_motes_by_trace[:M]

        # **Step 2**: Further filter nodes with covariance > threshold
        nodes_to_poll = []
        for mote in top_covariance_nodes:
            column_name = f'mote{mote}'
            if column_name in row:
                covariance = P[column_name]  # Current covariance value

                # Include node if covariance > threshold
                if covariance > covariance_threshold:
                    nodes_to_poll.append(mote)

                    # Log the polled node and its covariance
                    polled_nodes_info.append({
                        "time_step": current_time_step,
                        "node": mote,
                        "covariance": covariance
                    })

        total_reward_for_round = 0

        # Process the selected nodes
        for mote in nodes_to_poll:
            column_name = f'mote{mote}'
            if column_name in row:
                measured_value = row[column_name]  # Current measurement
                last_observed_value = last_observations[column_name]  # Last observed value

                # Reward or penalty based on the difference between the last observation and the current measured value
                if abs(last_observed_value - measured_value) > theta:
                    total_reward_for_round += 1  # Reward
                else:
                    total_reward_for_round += penalty  # Penalty

                # Prediction step
                previous_state = state_estimates[column_name]
                previous_P = P[column_name]
                predicted_covariance = previous_P + Q  # Update covariance with process noise

                # Kalman filter update step
                K = predicted_covariance / (predicted_covariance + R)  # Kalman gain
                updated_state = previous_state + K * (measured_value - previous_state)  # Update state estimate
                updated_covariance = (1 - K) * predicted_covariance  # Update covariance

                # Update state estimates, covariance, and last observed value
                state_estimates[column_name] = updated_state
                P[column_name] = updated_covariance
                last_observations[column_name] = measured_value  # Update last observed value
                last_update_times[column_name] = current_time_step  # Update last update time

        # Update cumulative reward
        cumulative_reward += total_reward_for_round
        cumulative_rewards.append(cumulative_reward / (idx + 1))  # Compute cumulative average reward

    # Convert polled nodes info to a DataFrame for analysis
    polled_nodes_info_df = pd.DataFrame(polled_nodes_info)
    polled_nodes_info_df.to_csv('polled_nodes_covariance_check.csv', index=False)  # Save to CSV

    return cumulative_rewards, polled_nodes_info_df

# Parameters
M = 10  # Number of nodes to poll
theta = 0.5  # Threshold for significant difference
penalty = -0.5  # Penalty for insignificant difference
Q = 0.001  # Process noise covariance (scalar)
R = 0.1  # Measurement noise covariance (scalar)
covariance_threshold = 0.5  # Threshold for covariance error

# Run the simulation
cumulative_rewards_kalman, polled_nodes_info = run_simulation_kalman_with_covariance_check(
    pivot_df, num_nodes, M, theta, penalty, covariance_threshold
)

# View the polled nodes' info DataFrame
print(polled_nodes_info)

# Save cumulative rewards to CSV
pd.DataFrame(cumulative_rewards_kalman, columns=["cumulative_reward"]).to_csv('cumulative_rewards_kalman_covariance.csv', index=False)

# Plot cumulative average reward over time
plt.figure(figsize=(10, 5))
plt.plot(range(1, len(cumulative_rewards_kalman) + 1), cumulative_rewards_kalman, label='Kalman Filter with Covariance Check')
plt.xlabel('Time Steps', fontsize=14, fontweight='bold')
plt.ylabel('Cumulative Average Reward', fontsize=14, fontweight='bold')
plt.title('Kalman Filter with Covariance Check - Cumulative Average Reward', fontsize=14, fontweight='bold')
plt.legend()
plt.grid(True, which='both', linestyle='--', linewidth=0.5)
plt.tick_params(axis='both', which='major', labelsize=12)
plt.xlim(0, len(cumulative_rewards_kalman))

# Save the plot
plt.savefig('kalman_covariance_check_reward.png')

plt.show()

In [None]:
def run_simulation_kalman_with_two_step_pooling(pivot_df, num_nodes, M, theta, penalty):
    """Runs the simulation using Kalman filter (1D) and computes cumulative average reward with two-step pooling."""
    
    # Initialize state estimates and covariance matrices (scalars)
    state_estimates = {f'mote{i}': 20.0 for i in range(1, num_nodes + 1)}  # Initial state estimates
    P = {f'mote{i}': 0.5 for i in range(1, num_nodes + 1)}  # Initial covariance matrices (scalars)
    last_observations = {f'mote{i}': 20.0 for i in range(1, num_nodes + 1)}  # Initialize last observed values
    last_update_times = {f'mote{i}': 0 for i in range(1, num_nodes + 1)}  # Time when the last measurement was received

    cumulative_reward = 0  # Track total cumulative reward
    cumulative_rewards = []  # Store cumulative average reward over time

    # List to log the selected nodes and their MSE
    polled_nodes_info = []

    for idx, row in pivot_df.iterrows():
        current_time_step = idx

        # **Step 1**: Select nodes with the highest covariance
        covariance_traces = {mote: P[f'mote{mote}'] for mote in range(1, num_nodes + 1)}
        sorted_motes_by_trace = sorted(covariance_traces, key=covariance_traces.get, reverse=True)
        top_covariance_nodes = sorted_motes_by_trace[:M]

        # **Step 2**: Further filter nodes by MSE threshold
        nodes_to_poll = []
        for mote in top_covariance_nodes:
            column_name = f'mote{mote}'
            if column_name in row:
                last_observed_value = last_observations[column_name]
                previous_state = state_estimates[column_name]

                # Predict next state
                predicted_state = previous_state  # No dynamics, so prediction is the previous state

                # Calculate MSE (difference between last observation and predicted state)
                mse = (last_observed_value - predicted_state) ** 2

                # Include the node for polling only if MSE > 0
                if mse > 0:
                    nodes_to_poll.append(mote)

                    # Log the polled node and its MSE
                    polled_nodes_info.append({
                        "time_step": current_time_step,
                        "node": mote,
                        "mse": mse,
                        "covariance": covariance_traces[mote]
                    })

        total_reward_for_round = 0

        # Process the selected nodes
        for mote in nodes_to_poll:
            column_name = f'mote{mote}'
            if column_name in row:
                measured_value = row[column_name]  # Current measurement
                last_observed_value = last_observations[column_name]  # Last observed value

                # Reward or penalty based on the difference between the last observation and the current measured value
                if abs(last_observed_value - measured_value) > theta:
                    total_reward_for_round += 1  # Reward
                else:
                    total_reward_for_round += penalty  # Penalty

                # Prediction step
                previous_state = state_estimates[column_name]
                previous_P = P[column_name]
                predicted_covariance = previous_P + Q  # Update covariance with process noise

                # Kalman filter update step
                K = predicted_covariance / (predicted_covariance + R)  # Kalman gain
                updated_state = previous_state + K * (measured_value - previous_state)  # Update state estimate
                updated_covariance = (1 - K) * predicted_covariance  # Update covariance

                # Update state estimates, covariance, and last observed value
                state_estimates[column_name] = updated_state
                P[column_name] = updated_covariance
                last_observations[column_name] = measured_value  # Update last observed value
                last_update_times[column_name] = current_time_step  # Update last update time

        # Update cumulative reward
        cumulative_reward += total_reward_for_round
        cumulative_rewards.append(cumulative_reward / (idx + 1))  # Compute cumulative average reward

    # Convert polled nodes info to a DataFrame for analysis
    polled_nodes_info_df = pd.DataFrame(polled_nodes_info)
    polled_nodes_info_df.to_csv('polled_nodes_two_step_pooling.csv', index=False)  # Save to CSV

    return cumulative_rewards, polled_nodes_info_df

# Parameters
M = 10  # Number of nodes to poll
theta = 0.5  # Threshold for significant difference
penalty = -0.5  # Penalty for insignificant difference
Q = 0.001  # Process noise covariance (scalar)
R = 0.1  # Measurement noise covariance (scalar)

# Run the simulation
cumulative_rewards_kalman, polled_nodes_info = run_simulation_kalman_with_two_step_pooling(
    pivot_df, num_nodes, M, theta, penalty
)

# View the polled nodes' info DataFrame
print(polled_nodes_info)

# Save cumulative rewards to CSV
pd.DataFrame(cumulative_rewards_kalman, columns=["cumulative_reward"]).to_csv('cumulative_rewards_kalman_two_step.csv', index=False)

# Plot cumulative average reward over time
plt.figure(figsize=(10, 5))
plt.plot(range(1, len(cumulative_rewards_kalman) + 1), cumulative_rewards_kalman, label='Kalman Filter with Two-Step Pooling')
plt.xlabel('Time Steps', fontsize=14, fontweight='bold')
plt.ylabel('Cumulative Average Reward', fontsize=14, fontweight='bold')
plt.title('Kalman Filter with Two-Step Pooling - Cumulative Average Reward', fontsize=14, fontweight='bold')
plt.legend()
plt.grid(True, which='both', linestyle='--', linewidth=0.5)
plt.tick_params(axis='both', which='major', labelsize=12)
plt.xlim(0, len(cumulative_rewards_kalman))

# Save the plot
plt.savefig('kalman_two_step_pooling_reward.png')

plt.show()


In [4]:
# Function to simulate Whittle Index AoI with cumulative reward tracking
M=5
def run_simulation_whittle_aoi(pivot_df, num_nodes, M, theta, penalty):
    cumulative_reward = 0  # Track total cumulative reward
    cumulative_rewards = []  # Store cumulative average reward over time

    last_update_whittle = np.zeros(num_nodes)  # Last update times for each node
    last_polled_values = {f'mote{i}': 20.0 for i in range(1, num_nodes + 1)}  # Initial values for each node

    def transmission_success(prob):
        return np.random.rand() < prob

    for t in range(len(pivot_df)):  # Loop over all time steps
        whittle_indices = {}

        # Compute Whittle indices for each node
        for node in range(1, num_nodes + 1):
            column_name = f'mote{node}'
            delta_t_dynamic = t - last_update_whittle[node - 1]  # Time since last update (AoI)
            current_aoi = delta_t_dynamic
            future_aoi_passive = delta_t_dynamic + 1
            future_aoi_active = 0  # Assume perfect transmission for now

            q_passive = current_aoi + future_aoi_passive
            q_active = current_aoi + future_aoi_active + 0  # No penalty for active action
            whittle_indices[node] = q_passive - q_active

        # Poll top M nodes with the highest Whittle index
        nodes_to_poll = [node for node, index in whittle_indices.items() if index >= 0]
        if len(nodes_to_poll) > M:
            nodes_to_poll = sorted(nodes_to_poll, key=lambda node: whittle_indices[node], reverse=True)[:M]

        round_reward = 0  # Reward for the current time step

        for node in nodes_to_poll:
            column_name = f'mote{node}'
            current_value = pivot_df.loc[t, column_name]
            last_value = last_polled_values[column_name]

            # Calculate reward or penalty
            if abs(current_value - last_value) > theta:
                round_reward += 1  # Reward
            else:
                round_reward += penalty  # Penalty

            # Update the last polled value
            last_polled_values[column_name] = current_value
            last_update_whittle[node - 1] = t  # Update last update time

        # Update cumulative reward
        cumulative_reward += round_reward
        cumulative_rewards.append(cumulative_reward / (t + 1))  # Calculate cumulative average reward

    return cumulative_rewards

# Run the simulation
cumulative_rewards_whittle = run_simulation_whittle_aoi(pivot_df, num_nodes, M, theta, penalty)

# Save cumulative rewards to CSV
pd.DataFrame(cumulative_rewards_whittle, columns=["cumulative_reward"]).to_csv('cumulative_rewards_whittle.csv', index=False)

# Plot cumulative average reward over time
plt.figure(figsize=(10, 5))
plt.plot(range(1, len(cumulative_rewards_whittle) + 1), cumulative_rewards_whittle, label='Whittle AoII Cumulative Average Reward')
plt.xlabel('Time Steps', fontsize=14, fontweight='bold')
plt.ylabel('Cumulative Average Reward', fontsize=14, fontweight='bold')
plt.title('Whittle AoII Cumulative Average Reward over Time', fontsize=14, fontweight='bold')
plt.legend()
plt.grid(True, which='both', linestyle='--', linewidth=0.5)
plt.tick_params(axis='both', which='major', labelsize=12)
plt.xlim(0, len(cumulative_rewards_whittle))

# Save the plot
plt.savefig('whittle_aoi_cumulative_average_reward.png')

plt.show()


KeyError: 'mote51'

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt



# Parameters
columns = pivot_df.columns  # Get the actual column names from the dataset
num_nodes = len(columns)  # Total number of nodes based on columns in the dataset
#M = 10  # Maximum number of nodes to poll in each time step
theta = 0.5  # Threshold for significant difference
penalty = -0.5  # Penalty for insignificant difference
initial_value = 20  # Initial value for last polled
num_time_steps = len(pivot_df)  # Total time steps based on the dataset

# Initialize last polled values
last_polled_values_rr = {column: initial_value for column in columns}

def run_simulation_round_robin(pivot_df, columns, M, theta, penalty):
    last_polled_values = {column: initial_value for column in columns}  # Last polled values
    cumulative_reward = 0  # Total cumulative reward
    cumulative_rewards = []  # Store cumulative rewards over time

    for t in range(len(pivot_df)):  # Loop over all time steps
        # Poll M nodes in cyclic order
        nodes_rr = [columns[(t + i) % num_nodes] for i in range(M)]  # Poll M nodes in cyclic order
        round_reward = 0  # Reward for the current time step

        for node_rr in nodes_rr:
            current_value = pivot_df.loc[t, node_rr]  # Current value for the node
            last_value = last_polled_values[node_rr]

            # Calculate reward or penalty
            if abs(current_value - last_value) > theta:
                round_reward += 1  # Reward
            else:
                round_reward += penalty  # Penalty

            # Update the last polled value
            last_polled_values[node_rr] = current_value

        # Update cumulative reward
        cumulative_reward += round_reward
        cumulative_rewards.append(cumulative_reward / (t + 1))  # Append cumulative average reward

    return cumulative_rewards

# Run Round Robin simulation with perfect transmission
cumulative_rewards_rr = run_simulation_round_robin(pivot_df, columns, M, theta, penalty)

# Save cumulative rewards to CSV
pd.DataFrame(cumulative_rewards_rr, columns=["cumulative_reward"]).to_csv('cumulative_rewards_round_robin.csv', index=False)

# Plot cumulative average reward over time
plt.figure(figsize=(10, 5))
plt.plot(range(1, len(cumulative_rewards_rr) + 1), cumulative_rewards_rr, label='Round Robin Cumulative Average Reward')
plt.xlabel('Time Steps', fontsize=14, fontweight='bold')
plt.ylabel('Cumulative Average Reward', fontsize=14, fontweight='bold')
plt.title('Round Robin Cumulative Average Reward over Time', fontsize=14, fontweight='bold')
plt.legend()
plt.grid(True, which='both', linestyle='--', linewidth=0.5)
plt.tick_params(axis='both', which='major', labelsize=12)
plt.xlim(0, len(cumulative_rewards_rr))

# Save the plot
plt.savefig('round_robin_cumulative_average_reward.png')

plt.show()

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Load cumulative rewards from CSV files
cumulative_rewards_oracle = pd.read_csv('cumulative_rewards_oracle.csv')['cumulative_reward'].values
cumulative_rewards_rr = pd.read_csv('cumulative_rewards_round_robin.csv')['cumulative_reward'].values
cumulative_rewards_whittle_aoii = pd.read_csv('cumulative_rewards_whittle_aoii.csv')['cumulative_reward'].values
cumulative_rewards_whittle_aoi = pd.read_csv('cumulative_rewards_whittle_aoi.csv')['cumulative_reward'].values
cumulative_rewards_kalman = pd.read_csv('cumulative_rewards_kalman.csv')['cumulative_reward'].values

# Calculate cumulative average rewards for each technique
cumulative_average_reward_oracle = np.cumsum(cumulative_rewards_oracle) / (np.arange(len(cumulative_rewards_oracle)) + 1)
cumulative_average_reward_rr = np.cumsum(cumulative_rewards_rr) / (np.arange(len(cumulative_rewards_rr)) + 1)
cumulative_average_reward_waoii = np.cumsum(cumulative_rewards_whittle_aoii) / (np.arange(len(cumulative_rewards_whittle_aoii)) + 1)
cumulative_average_reward_waoi = np.cumsum(cumulative_rewards_whittle_aoi) / (np.arange(len(cumulative_rewards_whittle_aoi)) + 1)
cumulative_average_reward_kalman = np.cumsum(cumulative_rewards_kalman) / (np.arange(len(cumulative_rewards_kalman)) + 1)

# Calculate regret for each technique by subtracting from the oracle's cumulative average reward
regret_rr = cumulative_average_reward_oracle - cumulative_average_reward_rr
regret_waoii = cumulative_average_reward_oracle - cumulative_average_reward_waoii
regret_waoi = cumulative_average_reward_oracle - cumulative_average_reward_waoi
regret_kalman = cumulative_average_reward_oracle - cumulative_average_reward_kalman

# Plot the regret over time
plt.figure(figsize=(8, 6))
plt.plot(range(len(regret_rr)), regret_rr, label='RR Regret', linewidth=2)
plt.plot(range(len(regret_waoii)), regret_waoii, label='W-AoII Regret', linewidth=2)
plt.plot(range(len(regret_waoi)), regret_waoi, label='W-AoI Regret', linewidth=2)
plt.plot(range(len(regret_kalman)), regret_kalman, label='KF Regret', linewidth=2)

# Label the plot
plt.xlabel('Time Step', fontsize=16)
plt.ylabel('Regret', fontsize=16)
plt.title('Regret Comparison Across Techniques', fontsize=16, fontweight='bold')
plt.legend(fontsize=12)
plt.grid(True, which='both', linestyle='--', linewidth=0.7)
plt.tick_params(axis='both', which='major', labelsize=14)
plt.xlim(0, 5000)  # Adjust this limit as needed based on your data

# Save the plot
plt.savefig('comparison_regret_over_time.png')
plt.show()


FileNotFoundError: [Errno 2] No such file or directory: 'cumulative_rewards_oracle.csv'