In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Load the pivot table containing node data (e.g., sensor readings)
#pivot_df = pd.read_csv('top_fifty_motes_temperatures.csv')
pivot_df = pd.read_csv('synthetic_scenario_two.csv')
pivot_df = pivot_df.apply(lambda x: x.fillna(x.mean()), axis=0)
pivot_df = pivot_df.head(5000)

# Initialize process and measurement noise covariance (scalars)
Q = 0.0001  # Process noise covariance (scalar)
R = 0.1     # Measurement noise covariance (scalar)
theta = 0.2  # Threshold for polling decision
L = 100  # Fairness constraint

# Kalman Filter-based simulation function (with one state parameter)
def run_simulation_kalman_with_pooling_distribution(num_nodes, num_nodes_to_poll):
    """Runs the simulation using Kalman filter (1D) and returns pooling statistics."""
    
    # Initialize state estimates and covariance matrices (scalars)
    state_estimates = {f'mote{i}': 20.0 for i in range(1, num_nodes + 1)}  # Initial state estimates
    P = {f'mote{i}': 0.1 for i in range(1, num_nodes + 1)}  # Initial covariance matrices (scalars)
    last_update_times = {f'mote{i}': 0 for i in range(1, num_nodes + 1)}  # Time when the last measurement was received
    polling_count = np.zeros(num_nodes)  # Track number of times each node is polled

    for idx, row in pivot_df.iterrows():
        current_time_step = idx

        # Calculate covariance traces (which are scalars here)
        covariance_traces = {mote: P[f'mote{mote}'] for mote in range(1, num_nodes + 1)}
        sorted_motes_by_trace = sorted(covariance_traces, key=covariance_traces.get, reverse=True)
        nodes_to_poll = set(sorted_motes_by_trace[:num_nodes_to_poll])

        # Fairness constraint: Poll nodes that have not been updated in L time steps
        for mote in range(1, num_nodes + 1):
            if (current_time_step - last_update_times[f'mote{mote}']) > L and mote not in nodes_to_poll:
                lowest_trace_node = sorted_motes_by_trace[-1]
                if lowest_trace_node in nodes_to_poll:
                    nodes_to_poll.remove(lowest_trace_node)
                nodes_to_poll.add(mote)

        # Process the selected nodes
        for mote in nodes_to_poll:
            column_name = f'mote{mote}'
            if column_name in row:
                measured_value = row[column_name]  # New measurement received
                previous_state = state_estimates[column_name]  # Use last state estimate
                previous_P = P[column_name]  # Use last covariance matrix (scalar)
                delta_t = max(current_time_step - last_update_times[column_name], 1)  # Time since last measurement

                # Prediction step
                xp = previous_state  # Predict future state (since it's 1D, xp is just the last state)
                Pp = previous_P + Q  # Predict future covariance (scalar)

                # If the difference exceeds the threshold, update the state
                if abs(xp - measured_value) > theta:
                    # Update step
                    K = Pp / (Pp + R)  # Kalman gain (scalar)
                    x_hat = xp + K * (measured_value - xp)  # Update state estimate using new measurement (scalar)
                    P_hat = (1 - K) * Pp  # Update covariance (scalar)
                    
                    state_estimates[column_name] = x_hat  # Update state estimate
                    P[column_name] = P_hat  # Update covariance
                    polling_count[mote - 1] += 1  # Increment polling count
                    last_update_times[column_name] = current_time_step  # Update the last time of measurement

    return polling_count  # Return the polling counts as a NumPy array

# Run the simulation for a certain number of nodes to poll
num_nodes = 50
num_nodes_to_poll = 5
polling_count = run_simulation_kalman_with_pooling_distribution(num_nodes, num_nodes_to_poll)

# Display results
print("Polling Count per Node:", polling_count)

# Plot the polling distribution
plt.figure(figsize=(8, 2))
plt.bar(range(1, num_nodes + 1), polling_count, color='blue', alpha=0.7)
plt.xlabel('Node ID')
plt.ylabel('Polling Count')
plt.title('Polling Count based on Kalman Filter (1D)')
plt.tight_layout()
plt.show()

FileNotFoundError: [Errno 2] No such file or directory: 'synthetic_scenario_two.csv'

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Load the pivot table containing node data (e.g., sensor readings)
pivot_df = pd.read_csv('top_fifty_motes_temperatures.csv')
pivot_df = pivot_df.apply(lambda x: x.fillna(x.mean()), axis=0)
pivot_df = pivot_df.head(5000)

# Initialize process and measurement noise covariance (scalars)
Q = 0.0001  # Process noise covariance (scalar)
R = 0.1     # Measurement noise covariance (scalar)
theta = 0.2  # Threshold for polling decision
L = 100  # Fairness constraint

# Kalman Filter-based simulation function (with one state parameter)
def run_simulation_kalman_with_pooling_distribution(num_nodes, num_nodes_to_poll):
    """Runs the simulation using Kalman filter (1D) and returns pooling statistics."""
    
    # Initialize state estimates and covariance matrices (scalars)
    state_estimates = {f'mote{i}': 20.0 for i in range(1, num_nodes + 1)}  # Initial state estimates
    P = {f'mote{i}': 0.1 for i in range(1, num_nodes + 1)}  # Initial covariance matrices (scalars)
    last_update_times = {f'mote{i}': 0 for i in range(1, num_nodes + 1)}  # Time when the last measurement was received
    polling_count = np.zeros(num_nodes)  # Track number of times each node is polled

    for idx, row in pivot_df.iterrows():
        current_time_step = idx

        # Calculate covariance traces (which are scalars here)
        covariance_traces = {mote: P[f'mote{mote}'] for mote in range(1, num_nodes + 1)}
        sorted_motes_by_trace = sorted(covariance_traces, key=covariance_traces.get, reverse=True)
        nodes_to_poll = set(sorted_motes_by_trace[:num_nodes_to_poll])

        # Fairness constraint: Poll nodes that have not been updated in L time steps
        for mote in range(1, num_nodes + 1):
            if (current_time_step - last_update_times[f'mote{mote}']) > L and mote not in nodes_to_poll:
                lowest_trace_node = sorted_motes_by_trace[-1]
                if lowest_trace_node in nodes_to_poll:
                    nodes_to_poll.remove(lowest_trace_node)
                nodes_to_poll.add(mote)

        # Process the selected nodes
        for mote in nodes_to_poll:
            column_name = f'mote{mote}'
            if column_name in row:
                measured_value = row[column_name]  # New measurement received
                previous_state = state_estimates[column_name]  # Use last state estimate
                previous_P = P[column_name]  # Use last covariance matrix (scalar)
                delta_t = max(current_time_step - last_update_times[column_name], 1)  # Time since last measurement

                # Prediction step
                xp = previous_state  # Predict future state (since it's 1D, xp is just the last state)
                Pp = previous_P + Q  # Predict future covariance (scalar)

                # If the difference exceeds the threshold, update the state
                if abs(xp - measured_value) > theta:
                    # Update step
                    K = Pp / (Pp + R)  # Kalman gain (scalar)
                    x_hat = xp + K * (measured_value - xp)  # Update state estimate using new measurement (scalar)
                    P_hat = (1 - K) * Pp  # Update covariance (scalar)
                    
                    state_estimates[column_name] = x_hat  # Update state estimate
                    P[column_name] = P_hat  # Update covariance
                    polling_count[mote - 1] += 1  # Increment polling count
                    last_update_times[column_name] = current_time_step  # Update the last time of measurement

    return polling_count  # Return the polling counts as a NumPy array

# Run the simulation for a certain number of nodes to poll
num_nodes = 50
num_nodes_to_poll = 5
polling_count = run_simulation_kalman_with_pooling_distribution(num_nodes, num_nodes_to_poll)

# Display results
print("Polling Count per Node:", polling_count)

# Plot the polling distribution
plt.figure(figsize=(8, 2))
plt.bar(range(1, num_nodes + 1), polling_count, color='blue', alpha=0.7)
plt.xlabel('Node ID')
plt.ylabel('Polling Count')
plt.title('Polling Count based on Kalman Filter (1D)')
plt.tight_layout()
plt.show()
