In [None]:

# Load dataset and preprocess
pivot_df = pd.read_csv("synthetic_scenario_30_nodes.csv")
pivot_df = pivot_df.apply(lambda x: x.fillna(x.mean()), axis=0)  # Fill missing values
pivot_df = pivot_df.head(10000)  # Restrict to first 20000 time steps

# Parameters
reward = 0.5
theta = 0.5  # Threshold for reward condition
penalty = -0.5  # Penalty for polling when difference is <= theta
initial_value = 20  # Initial estimate for last polled values

# Set parameters
beta_1 = 0.8  # dEWMA parameter for state value
beta_2 = 0.01  # dEWMA parameter for rate of change

# Extract column names, ensuring "SN" is excluded
columns = [col for col in pivot_df.columns if col != "SN"]
num_nodes = len(columns)  # Number of sensor nodes based on dataset columns
num_time_steps = len(pivot_df)  # Total time steps based on dataset length

# Function to calculate Age of Incorrect Information (AoII) at the sink
def calculate_aoii_sink(current_time, last_received_time, last_rate_of_change):
    # Handle potential NaN or Inf values
    time_diff = current_time - last_received_time
    if np.isnan(last_rate_of_change) or np.isinf(last_rate_of_change):
        return 0.0  # Default to zero if rate of change is invalid
    return abs(time_diff * last_rate_of_change)

# Helper function to update state using dEWMA
def update_node_state_dewma(measured_value, last_state_value, last_rate_of_change, delta_t, beta_1, beta_2):
    # Handle the case where delta_t is 0 to avoid division by zero
    if delta_t == 0:
        return measured_value, last_rate_of_change  # Return measured value and keep the last rate of change
    
    x1 = beta_1 * measured_value + (1 - beta_1) * (last_state_value + last_rate_of_change * delta_t)
    x2 = beta_2 * (x1 - last_state_value) / delta_t + (1 - beta_2) * last_rate_of_change
    return x1, x2

# Function to extract numeric node ID from column names dynamically, ensuring valid extraction
def extract_node_id(col_name):
    """Extract numeric node ID from column name, handling cases where no digits are found."""
    digits = ''.join(filter(str.isdigit, col_name))
    return int(digits) if digits else None  # Return None if no digits are found

# Dynamic Penalty Update algorithm based on Whittle indices
def dynamic_penalty_update(whittle_indices, M, current_lambda):
    # Convert dictionary values to a list of whittle indices, handling any NaN values
    c_values = []
    for v in whittle_indices.values():
        if np.isnan(v):
            c_values.append(-float('inf'))  # Treat NaN as negative infinity (won't be polled)
        else:
            c_values.append(v)
    
    # Identify the set ℰ of nodes where c_i > λ(t)
    eligible_nodes = [i for i, c_i in enumerate(c_values) if c_i > current_lambda]
    
    # If |ℰ| ≤ M, no penalty update needed
    if len(eligible_nodes) <= M:
        return current_lambda
    
    # Sort the c_i values in descending order
    sorted_c_values = sorted(c_values, reverse=True)
    
    # Identify the M-th value
    M_th_value = sorted_c_values[M-1]
    
    # Update penalty to the M-th value
    new_lambda = M_th_value
    
    return new_lambda

# Main function to simulate Whittle AoII with rewards and track transmission counts
def run_simulation_whittle_aoii_dynamic_penalty(pivot_df, columns, M, theta, penalty):
    # Track the number of times each category is pulled
    category_counts = {'Category A': 0, 'Category B': 0, 'Category C': 0}
    total_polls = 0  # Track total number of polls across all nodes
    last_update_times = {col: 0 for col in columns}  # Last update time for each node
    state_node = {col: np.array([20.0, 0.1]) for col in columns}  # Node states
    
    # Initialize dynamic penalty (λ) to 0
    aoii_penalty = 0.0
    
    # Track penalty evolution
    penalty_values = [aoii_penalty]
    
    # Track nodes polled at each time step
    nodes_polled_count = []
    
    # Set minimum timestamp difference to avoid division by zero
    min_delta_t = 1  # Minimum time difference of 1 to avoid division by zero
    
    # Track estimated values and MSE
    estimated_values = {col: np.zeros(len(pivot_df)) for col in columns}
    mse_per_timestep = []
    
    # Initialize estimated values with initial state
    for col in columns:
        estimated_values[col][0] = state_node[col][0]

    for t in range(len(pivot_df)):
        # Step 1: Compute Whittle indices for each node based on AoII
        whittle_indices = {}
        for col in columns:
            last_state_value, last_rate_of_change = state_node[col]
            measured_value = pivot_df.loc[t, col]

            # Correct AoII calculation at the sink using rate of change
            current_aoii = calculate_aoii_sink(t, last_update_times[col], last_rate_of_change)
            future_aoii_passive = calculate_aoii_sink(t+1, last_update_times[col], last_rate_of_change)
            future_aoii_active = 0  # AoII resets to 0 if polled

            # Whittle index calculations - with safety checks for NaN/Inf values
            q_passive = current_aoii + future_aoii_passive
            q_active = current_aoii + future_aoii_active + aoii_penalty
            
            # Calculate the Whittle index with safeguards against invalid values
            whittle_index = q_passive - q_active
            
            # Handle NaN or Inf values
            if np.isnan(whittle_index) or np.isinf(whittle_index):
                whittle_indices[col] = -float('inf')  # Set to negative infinity if invalid
            else:
                whittle_indices[col] = whittle_index

        # Step 2: Update the dynamic penalty (λ) using the algorithm
        aoii_penalty = dynamic_penalty_update(whittle_indices, M, aoii_penalty)
        penalty_values.append(aoii_penalty)
        
        # Step 3: Select nodes to poll based on the updated penalty
        nodes_to_poll = [col for col in whittle_indices if whittle_indices[col] >= aoii_penalty]
        nodes_polled_count.append(len(nodes_to_poll))
        total_polls += len(nodes_to_poll)  # Update total polls count

        # Step 4: Poll selected nodes and calculate rewards
        for col in nodes_to_poll:
            measured_value = pivot_df.loc[t, col]
            last_state_value, last_rate_of_change = state_node[col]

            delta_t_dynamic = max(min_delta_t, t - last_update_times[col])  # Time since last update (ensure at least 1)

            # Update node state and last update time
            state_node[col] = update_node_state_dewma(
                measured_value, last_state_value, last_rate_of_change, delta_t_dynamic, beta_1=beta_1, beta_2=beta_2
            )
            last_update_times[col] = t

            # Extract node ID dynamically and categorize
            node_id = extract_node_id(col)
            if node_id is not None:
                if 1 <= node_id <= 10:
                    category_counts['Category A'] += 1
                elif 11 <= node_id <= 20:
                    category_counts['Category B'] += 1
                elif 21 <= node_id <= 30:
                    category_counts['Category C'] += 1
        
        # Step 5: Update estimated values for all nodes (polled and non-polled)
        for col in columns:
            if col in nodes_to_poll:
                # For polled nodes, use the actual measurement
                estimated_values[col][t] = float(pivot_df.loc[t, col])
            else:
                # For non-polled nodes, use the predicted value from the state model
                last_state_value, last_rate_of_change = state_node[col]
                delta_t_since_last = max(min_delta_t, t - last_update_times[col])
                # Predict using the linear model: x(t) = x(t-1) + rate * delta_t
                estimated_values[col][t] = last_state_value + last_rate_of_change * delta_t_since_last
        
    
    # Calculate MAE manually since we already have the estimated values
    total_abs_error = 0
    count = 0
    for t in range(len(pivot_df)):
        for col in columns:
            total_abs_error += abs(pivot_df.loc[t, col] - estimated_values[col][t])
            count += 1
    avg_mae = total_abs_error / count

    return {
        'total_polls': total_polls,
        'category_counts': category_counts,
        'avg_mse': avg_mse,
        'avg_rmse': avg_rmse,
        'avg_mae': avg_mae
    }





In [1]:
import pandas as pd
import numpy as np

# Load dataset and preprocess
pivot_df = pd.read_csv("synthetic_scenario_30_nodes.csv")
pivot_df = pivot_df.apply(lambda x: x.fillna(x.mean()), axis=0)
pivot_df = pivot_df.head(10000)

reward = 0.5
theta = 0.5
penalty = -0.5
initial_value = 20
beta_1 = 0.8
beta_2 = 0.01

columns = [col for col in pivot_df.columns if col != "SN"]
num_nodes = len(columns)
num_time_steps = len(pivot_df)

def calculate_aoii_sink(current_time, last_received_time, last_rate_of_change):
    time_diff = current_time - last_received_time
    if np.isnan(last_rate_of_change) or np.isinf(last_rate_of_change):
        return 0.0
    return abs(time_diff * last_rate_of_change)

def update_node_state_dewma(measured_value, last_state_value, last_rate_of_change, delta_t, beta_1, beta_2):
    if delta_t == 0:
        return measured_value, last_rate_of_change
    x1 = beta_1 * measured_value + (1 - beta_1) * (last_state_value + last_rate_of_change * delta_t)
    x2 = beta_2 * (x1 - last_state_value) / delta_t + (1 - beta_2) * last_rate_of_change
    return x1, x2

def extract_node_id(col_name):
    digits = ''.join(filter(str.isdigit, col_name))
    return int(digits) if digits else None

def dynamic_penalty_update(whittle_indices, M, current_lambda):
    c_values = [v if not np.isnan(v) else -float('inf') for v in whittle_indices.values()]
    eligible_nodes = [i for i, c_i in enumerate(c_values) if c_i > current_lambda]
    if len(eligible_nodes) <= M:
        return current_lambda
    sorted_c_values = sorted(c_values, reverse=True)
    M_th_value = sorted_c_values[M - 1]
    return M_th_value

def run_simulation_with_aoii_tracking(pivot_df, columns, M, theta, penalty):
    last_update_times = {col: 0 for col in columns}
    state_node = {col: np.array([20.0, 0.1]) for col in columns}
    aoii_penalty = 0.0
    min_delta_t = 1

    # Track AoII growth over time for each node
    cumulative_aoii_per_node = {col: 0.0 for col in columns}

    for t in range(len(pivot_df)):
        whittle_indices = {}
        for col in columns:
            last_state_value, last_rate_of_change = state_node[col]
            current_aoii = calculate_aoii_sink(t, last_update_times[col], last_rate_of_change)
            future_aoii_passive = calculate_aoii_sink(t + 1, last_update_times[col], last_rate_of_change)
            q_passive = current_aoii + future_aoii_passive
            q_active = current_aoii + 0 + aoii_penalty
            whittle_index = q_passive - q_active
            whittle_indices[col] = whittle_index if not (np.isnan(whittle_index) or np.isinf(whittle_index)) else -float('inf')

        aoii_penalty = dynamic_penalty_update(whittle_indices, M, aoii_penalty)

        nodes_to_poll = [col for col in whittle_indices if whittle_indices[col] >= aoii_penalty]

        for col in columns:
            last_state_value, last_rate_of_change = state_node[col]
            current_aoii = calculate_aoii_sink(t, last_update_times[col], last_rate_of_change)

            # Add current AoII to cumulative count
            cumulative_aoii_per_node[col] += current_aoii

            if col in nodes_to_poll:
                measured_value = pivot_df.loc[t, col]
                delta_t_dynamic = max(min_delta_t, t - last_update_times[col])
                state_node[col] = update_node_state_dewma(
                    measured_value, last_state_value, last_rate_of_change, delta_t_dynamic, beta_1, beta_2
                )
                last_update_times[col] = t

    average_aoii_per_node = {col: cumulative_aoii_per_node[col] / num_time_steps for col in columns}
    overall_average_aoii = sum(average_aoii_per_node.values()) / len(columns)

    return average_aoii_per_node, overall_average_aoii

average_aoii_per_node, overall_average_aoii = run_simulation_with_aoii_tracking(pivot_df, columns, M=2, theta=0.5, penalty=-0.5)
print("Overall Average AoII:", overall_average_aoii)


Overall Average AoII: 2.8568904221083122


In [21]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Load dataset and preprocess
#pivot_df = pd.read_csv("synthetic_scenario_30_nodes.csv")
pivot_df = pd.read_csv("simulated_office_environment.csv")
pivot_df = pivot_df.apply(lambda x: x.fillna(x.mean()), axis=0)
pivot_df = pivot_df.head(10000)

initial_value = 20
beta_1 = 0.9
beta_2 = 0.01
columns = [col for col in pivot_df.columns if col != "SN"]
num_nodes = len(columns)
num_time_steps = len(pivot_df)

def calculate_aoii_sink(current_time, last_received_time, last_rate_of_change):
    return abs((current_time - last_received_time) * last_rate_of_change)

def update_node_state_dewma(measured_value, last_state_value, last_rate_of_change, delta_t, beta_1, beta_2):
    if delta_t == 0:
        return measured_value, last_rate_of_change
    x1 = beta_1 * measured_value + (1 - beta_1) * (last_state_value + last_rate_of_change * delta_t)
    x2 = beta_2 * (x1 - last_state_value) / delta_t + (1 - beta_2) * last_rate_of_change
    return x1, x2

def run_simulation_whittle_aoii_with_global_penalty(pivot_df, columns, M, theta, penalty, global_aoii_penalty):
    last_update_times = {col: 0 for col in columns}
    state_node = {col: np.array([20.0, 0.1]) for col in columns}
    cumulative_aoii_per_node = {col: 0.0 for col in columns}
    min_delta_t = 1

    for t in range(len(pivot_df)):
        whittle_indices = {}
        for col in columns:
            last_state_value, last_rate_of_change = state_node[col]
            current_aoii = calculate_aoii_sink(t, last_update_times[col], last_rate_of_change)
            future_aoii_passive = calculate_aoii_sink(t + 1, last_update_times[col], last_rate_of_change)
            q_passive = current_aoii + future_aoii_passive
            q_active = current_aoii + 0 + global_aoii_penalty
            whittle_indices[col] = q_passive - q_active

            cumulative_aoii_per_node[col] += current_aoii

        nodes_to_poll = [col for col in whittle_indices if whittle_indices[col] >= 0]
        if len(nodes_to_poll) > M:
            nodes_to_poll = sorted(nodes_to_poll, key=whittle_indices.get, reverse=True)[:M]

        for col in nodes_to_poll:
            measured_value = pivot_df.loc[t, col]
            last_state_value, last_rate_of_change = state_node[col]
            delta_t_dynamic = max(min_delta_t, t - last_update_times[col])
            state_node[col] = update_node_state_dewma(
                measured_value, last_state_value, last_rate_of_change, delta_t_dynamic, beta_1, beta_2
            )
            last_update_times[col] = t

    average_aoii_per_node = {col: cumulative_aoii_per_node[col] / num_time_steps for col in columns}
    overall_average_aoii = sum(average_aoii_per_node.values()) / len(columns)

    return average_aoii_per_node, overall_average_aoii

M = 1
theta = 0.5
penalty = -0.5
global_aoii_penalty = 0

average_aoii_per_node, overall_average_aoii = run_simulation_whittle_aoii_with_global_penalty(
    pivot_df, columns, M, theta, penalty, global_aoii_penalty
)
print("Overall Average AoII (Whittle with Global Penalty):", overall_average_aoii)


Overall Average AoII (Whittle with Global Penalty): 0.5904328278486236


In [16]:
import pandas as pd
import numpy as np

# Load dataset and preprocess
#pivot_df = pd.read_csv("synthetic_scenario_30_nodes.csv")
pivot_df = pd.read_csv("simulated_office_environment.csv")
pivot_df = pivot_df.apply(lambda x: x.fillna(x.mean()), axis=0)
pivot_df = pivot_df.head(10000)

initial_value = 20
beta_1 = 0.9
beta_2 = 0.01

columns = [col for col in pivot_df.columns if col != "SN"]
num_nodes = len(columns)
num_time_steps = len(pivot_df)

def calculate_aoii_sink(current_time, last_received_time, last_rate_of_change):
    time_diff = current_time - last_received_time
    if np.isnan(last_rate_of_change) or np.isinf(last_rate_of_change):
        return 0.0
    return abs(time_diff * last_rate_of_change)

def update_node_state_dewma(measured_value, last_state_value, last_rate_of_change, delta_t, beta_1, beta_2):
    if delta_t == 0:
        return measured_value, last_rate_of_change
    x1 = beta_1 * measured_value + (1 - beta_1) * (last_state_value + last_rate_of_change * delta_t)
    x2 = beta_2 * (x1 - last_state_value) / delta_t + (1 - beta_2) * last_rate_of_change
    return x1, x2

def run_simulation_round_robin_aoii_tracking(pivot_df, columns):
    last_update_times = {col: 0 for col in columns}
    state_node = {col: np.array([20.0, 0.1]) for col in columns}
    min_delta_t = 1

    cumulative_aoii_per_node = {col: 0.0 for col in columns}

    # Round-robin polling index
    polling_index = 0

    for t in range(len(pivot_df)):
        current_node = columns[polling_index % len(columns)]

        for col in columns:
            last_state_value, last_rate_of_change = state_node[col]
            current_aoii = calculate_aoii_sink(t, last_update_times[col], last_rate_of_change)
            cumulative_aoii_per_node[col] += current_aoii

            if col == current_node:
                measured_value = pivot_df.loc[t, col]
                delta_t_dynamic = max(min_delta_t, t - last_update_times[col])
                state_node[col] = update_node_state_dewma(
                    measured_value, last_state_value, last_rate_of_change, delta_t_dynamic, beta_1, beta_2
                )
                last_update_times[col] = t

        polling_index += 1

    average_aoii_per_node = {col: cumulative_aoii_per_node[col] / num_time_steps for col in columns}
    overall_average_aoii = sum(average_aoii_per_node.values()) / len(columns)

    return average_aoii_per_node, overall_average_aoii

average_aoii_per_node, overall_average_aoii = run_simulation_round_robin_aoii_tracking(pivot_df, columns)
print("Overall Average AoII (Round Robin):", overall_average_aoii)


Overall Average AoII (Round Robin): 0.7376180618005805


In [20]:
import pandas as pd
import numpy as np

# Load dataset and preprocess
pivot_df = pd.read_csv("simulated_office_environment.csv")
pivot_df = pivot_df.apply(lambda x: x.fillna(x.mean()), axis=0)
pivot_df = pivot_df.head(10000)

initial_value = 20
beta_1 = 0.9
beta_2 = 0.01
columns = [col for col in pivot_df.columns if col != "SN"]
num_nodes = len(columns)
num_time_steps = len(pivot_df)

def calculate_aoii_sink(current_time, last_received_time, last_rate_of_change):
    time_diff = current_time - last_received_time
    if np.isnan(last_rate_of_change) or np.isinf(last_rate_of_change):
        return 0.0
    return abs(time_diff * last_rate_of_change)

def update_node_state_dewma(measured_value, last_state_value, last_rate_of_change, delta_t, beta_1, beta_2):
    if delta_t == 0:
        return measured_value, last_rate_of_change
    x1 = beta_1 * measured_value + (1 - beta_1) * (last_state_value + last_rate_of_change * delta_t)
    x2 = beta_2 * (x1 - last_state_value) / delta_t + (1 - beta_2) * last_rate_of_change
    return x1, x2

def run_simulation_aoi_based_tracking_with_limit(pivot_df, columns, aoi_threshold, M):
    last_update_times = {col: 0 for col in columns}
    state_node = {col: np.array([20.0, 0.1]) for col in columns}
    cumulative_aoii_per_node = {col: 0.0 for col in columns}
    min_delta_t = 1

    for t in range(len(pivot_df)):
        aoi_values = {col: t - last_update_times[col] for col in columns}

        for col in columns:
            last_state_value, last_rate_of_change = state_node[col]
            current_aoii = calculate_aoii_sink(t, last_update_times[col], last_rate_of_change)
            cumulative_aoii_per_node[col] += current_aoii

        # Select top M nodes with the highest AOI exceeding the threshold
        eligible_nodes = [col for col, aoi in aoi_values.items() if aoi >= aoi_threshold]
        nodes_to_poll = sorted(eligible_nodes, key=lambda x: aoi_values[x], reverse=True)[:M]

        for col in nodes_to_poll:
            measured_value = pivot_df.loc[t, col]
            last_state_value, last_rate_of_change = state_node[col]
            delta_t_dynamic = max(min_delta_t, t - last_update_times[col])
            state_node[col] = update_node_state_dewma(
                measured_value, last_state_value, last_rate_of_change, delta_t_dynamic, beta_1, beta_2
            )
            last_update_times[col] = t

    average_aoii_per_node = {col: cumulative_aoii_per_node[col] / num_time_steps for col in columns}
    overall_average_aoii = sum(average_aoii_per_node.values()) / len(columns)

    return average_aoii_per_node, overall_average_aoii

aoi_threshold = 0
M = 1
average_aoii_per_node, overall_average_aoii = run_simulation_aoi_based_tracking_with_limit(pivot_df, columns, aoi_threshold, M)
print("Overall Average AoII (AOI-based technique with M limit):", overall_average_aoii)



Overall Average AoII (AOI-based technique with M limit): 0.7219732013702596


In [24]:
import pandas as pd
import numpy as np

# Load dataset and preprocess
pivot_df = pd.read_csv("simulated_office_environment.csv")
pivot_df = pivot_df.apply(lambda x: x.fillna(x.mean()), axis=0)
pivot_df = pivot_df.head(10000)

# Define parameters
num_sensors = pivot_df.shape[1]
time_steps = pivot_df.shape[0]
M = 1  # Number of sensors to poll at each time step

dt = 1  # Time step
A = np.array([[1, dt], [0, 1]])  # State transition matrix
H = np.array([[1, 0]])
Q = np.array([[1e-3, 0], [0, 1e-3]])
R = np.array([[0.5]])

# Initialise Kalman Filter states
X = np.zeros((num_sensors, 2, 1))
X[:, 0, 0] = pivot_df.iloc[0, :].values
X[:, 1, 0] = 0
P = np.array([np.eye(2) for _ in range(num_sensors)])

# Track last update times for AOI calculation
last_update_times = np.zeros(num_sensors)
cumulative_aoi = np.zeros(num_sensors)

print("Running Kalman Filter simulation to calculate average AOI...")
for t in range(1, time_steps):
    # Prediction step
    for i in range(num_sensors):
        X[i] = A @ X[i]
        P[i] = A @ P[i] @ A.T + Q

    # Select M sensors to poll based on largest AOI (calculated as rate of change × time since last update)
    current_aoi = np.array([(t - last_update_times[i]) * abs(X[i, 1, 0]) for i in range(num_sensors)])
    polled_indices = np.argsort(current_aoi)[-M:]

    # Update step for polled sensors
    for i in polled_indices:
        Z = np.array([[pivot_df.iloc[t, i]]])
        y = Z - (H @ X[i])
        S = H @ P[i] @ H.T + R
        K = P[i] @ H.T @ np.linalg.inv(S)
        X[i] = X[i] + K @ y
        P[i] = (np.eye(2) - K @ H) @ P[i]
        last_update_times[i] = t

    # Accumulate AOI for all sensors at each time step
    cumulative_aoi += current_aoi

# Calculate average AOI per sensor and overall average AOI
average_aoi_per_sensor = cumulative_aoi / time_steps
overall_average_aoi = np.mean(average_aoi_per_sensor)

print("Overall Average AOI:", overall_average_aoi)







Running Kalman Filter simulation to calculate average AOI...
Overall Average AOI: 0.03224797293651896
