In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math

# Load the pivot DataFrame
pivot_df = pd.read_csv('top_fifty_motes_temperatures.csv')

# Parameters
A = np.array([[1, 1], [0, 1]])       # State transition matrix with delta_t=32 seconds
H = np.array([[1, 0]])                # Measurement matrix
Q = np.array([[1e-5, 0], [0, 1e-5]])  # Process noise covariance
R = np.array([[2]])                   # Measurement noise covariance
epsilon = 0.5                        # Sensitivity threshold for event detection
theta = 0.5                         # Threshold for significant change
num_nodes_to_poll_list = [1, 2, 5, 10, 20]  # List of numbers of nodes to poll
force_pull_threshold = 200             # Forcefully pull a node if it hasn't been pulled in the last 10 time steps

# Energy parameters in Joules
E_max = 162000  # Battery capacity in Joules
E_t = 50 / 1000  # Transmission energy in Joules
E_s = 10 / 1000  # Sensing energy in Joules
E_w = 10 / 1000  # Wake-up energy in Joules
E_0 = 1 / 1000   # Sleep energy in Joules
#mse_values = 0
#mse_count = 0
def run_simulation(num_nodes_to_poll):
    state_estimates = {f'mote{i}': np.array([[20], [0.01]]) for i in range(1, 51)}
    P = {f'mote{i}': np.zeros((2, 2)) for i in range(1, 51)}  # Initialize to zeros
    
    last_sensed_values = {f'mote{i}': 20.0 for i in range(1, 51)}
    last_update_times = {f'mote{i}': 0 for i in range(1, 51)}
    
    valuable_sensor_data = []
    polled_count = {f'mote{i}': 0 for i in range(1, 51)}
    transmitted_count = {f'mote{i}': 0 for i in range(1, 51)}
    
    previously_polled_nodes = set()
    mse_count = 0
    mse_values = 0
    def predict_node_state(x_hat, delta_t):
        return A @ x_hat
    
    def select_top_nodes(state_estimates, P, num_nodes_to_poll, currently_polled_nodes, previously_polled_nodes):
        covariance_traces = {mote: np.trace(P[mote]) for mote in state_estimates}
        sorted_motes_by_trace = sorted(covariance_traces, key=covariance_traces.get, reverse=True)
        top_nodes = set(sorted_motes_by_trace[:num_nodes_to_poll])
        return (top_nodes | currently_polled_nodes) - previously_polled_nodes

    for idx, row in pivot_df.iterrows():
        current_time_step = idx
        currently_polled_nodes = set()

        for mote, last_time in last_update_times.items():
            if current_time_step - last_time >= force_pull_threshold:
                currently_polled_nodes.add(mote)

        top_sensors = select_top_nodes(state_estimates, P, num_nodes_to_poll, currently_polled_nodes, previously_polled_nodes)

        for mote in top_sensors:
            polled_count[mote] += 1
            measured_value = row[mote]
            previous_state = state_estimates[mote]
            previous_P = P[mote]
            delta_t = max(current_time_step - last_update_times[mote], 1)

            xp = A @ previous_state
            Pp = A @ previous_P @ A.T + Q

            z = np.array([[measured_value]])
            K = Pp @ H.T @ np.linalg.inv(H @ Pp @ H.T + R)
            x_hat = xp + K @ (z - H @ xp)
            P_hat = Pp - K @ H @ Pp

            state_estimates[mote] = x_hat
            P[mote] = P_hat

            # Set covariance matrix of the polled sensor to zero
            P[mote] = np.zeros_like(P[mote])

            predicted_measurement = predict_node_state(x_hat, delta_t)[0, 0]
            diff = abs(predicted_measurement - measured_value)

            if not math.isnan(diff):
                mse_values += diff
                mse_count += 1
            #print(f'the mse value  is {mse_values}')

            if abs(measured_value - predicted_measurement) > theta:
                transmitted_count[mote] += 1
                last_sensed_values[mote] = measured_value

                valuable_sensor_data.append({
                    'index': current_time_step,
                    'selected_moteid': mote,
                    'temperature': measured_value,
                    'predicted_temperature': predicted_measurement,
                    'time_elapsed': delta_t
                })

            currently_polled_nodes.add(mote)

        previously_polled_nodes = top_sensors

        for mote in top_sensors:
            last_update_times[mote] = current_time_step

    valuable_sensor_df = pd.DataFrame(valuable_sensor_data)

    valuable_sensor_df['squared_error'] = (valuable_sensor_df['temperature'] - valuable_sensor_df['predicted_temperature']) ** 2
    average_mse = valuable_sensor_df['squared_error'].mean()

    #average_mse = mse_values / mse_count
    time_steps = len(pivot_df)
    fw = {mote: polled_count[mote] / time_steps for mote in polled_count}
    ft = {mote: transmitted_count[mote] / time_steps for mote in transmitted_count}

    average_lifetime_hours = np.mean([
        E_max / (ft[mote] * E_t + fw[mote] * (E_s + 3 * E_w) + (1 - fw[mote]) * E_0) for mote in polled_count
    ]) / 3600

    average_lifetime_years = average_lifetime_hours / 8760

    # Generate plot for distribution of polled over transmitted counts for each node
    ratios = {mote: transmitted_count[mote] / polled_count[mote]  if transmitted_count[mote] > 0 else 0 for mote in polled_count}



    return average_mse, average_lifetime_years

results = {}

for num_nodes_to_poll in num_nodes_to_poll_list:
    print(f"Running simulation for {num_nodes_to_poll} nodes to poll...")
    mse, lifetime = run_simulation(num_nodes_to_poll)
    results[num_nodes_to_poll] = {'MSE': mse, 'Lifetime (years)': lifetime}
    print(f"Completed simulation for {num_nodes_to_poll} nodes to poll.")

# Print the results
for num_nodes_to_poll, metrics in results.items():
    print(f"Nodes to poll: {num_nodes_to_poll}")
    print(f"  Average MSE: {metrics['MSE']}")
    print(f"  Average sensor lifetime (years): {metrics['Lifetime (years)']}")


Running simulation for 1 nodes to poll...
Completed simulation for 1 nodes to poll.
Running simulation for 2 nodes to poll...
Completed simulation for 2 nodes to poll.
Running simulation for 5 nodes to poll...
Completed simulation for 5 nodes to poll.
Running simulation for 10 nodes to poll...
Completed simulation for 10 nodes to poll.
Running simulation for 20 nodes to poll...
Completed simulation for 20 nodes to poll.
Running simulation for 50 nodes to poll...
Completed simulation for 50 nodes to poll.
Nodes to poll: 1
  Average MSE: 530.5684612765771
  Average sensor lifetime (years): 3.588273976708198
Nodes to poll: 2
  Average MSE: 628.5116272093883
  Average sensor lifetime (years): 3.5172170659586093
Nodes to poll: 5
  Average MSE: 706.2047928788825
  Average sensor lifetime (years): 3.3043085536884336
Nodes to poll: 10
  Average MSE: 744.9098229155848
  Average sensor lifetime (years): 2.9489516433490275
Nodes to poll: 20
  Average MSE: 759.354603372447
  Average sensor lifetim

# DEWMA for one step future prediction 

In [12]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Load the pivot DataFrame
pivot_df = pd.read_csv('top_fifty_motes_temperatures.csv')
pivot_df = pivot_df.head(10000)

# Initialize parameters
alpha = 0.04
beta = 0.1
initial_state = 20.0  # Initial temperature estimate
initial_rate = 0.01   # Initial rate of change estimate
theta = 0.2  # Threshold for significant change
num_nodes_to_poll_list = [1, 2, 5, 10, 20]  # List of numbers of nodes to poll
max_value = 40  # Maximum value to clamp to prevent overflow

# Force pull threshold
force_pull_threshold = 200  # Forcefully pull a node if it hasn't been pulled in the last 10 time steps

# Energy parameters in Joules
E_max = 162000  # Battery capacity in Joules
E_t = 50 / 1000  # Transmission energy in Joules
E_s = 10 / 1000  # Sensing energy in Joules
E_w = 10 / 1000  # Wake-up energy in Joules
E_0 = 1 / 1000   # Sleep energy in Joules

def run_simulation(num_nodes_to_poll):
    state_estimates = {f'mote{i}': initial_state for i in range(1, 51)}
    rate_of_change = {f'mote{i}': initial_rate for i in range(1, 51)}
    
    last_sensed_values = {f'mote{i}': initial_state for i in range(1, 51)}
    last_update_times = {f'mote{i}': 0 for i in range(1, 51)}
    
    valuable_sensor_data = []
    polled_count = {f'mote{i}': 0 for i in range(1, 51)}
    transmitted_count = {f'mote{i}': 0 for i in range(1, 51)}
    
    def predict_node_state(x_hat, x_rate, delta_t):
        result = x_hat + x_rate * delta_t
        return np.clip(result, -max_value, max_value)
    
    def select_top_nodes(state_estimates, rate_of_change, last_sensed_values, num_nodes_to_poll, delta_t, currently_polled_nodes):
        predicted_values = {mote: predict_node_state(state_estimates[mote], rate_of_change[mote], delta_t) for mote in state_estimates}
        value_changes = {mote: abs(predicted_values[mote] - last_sensed_values[mote]) for mote in state_estimates}
        top_nodes = sorted(value_changes, key=value_changes.get, reverse=True)
        top_nodes = [node for node in top_nodes if node not in currently_polled_nodes][:num_nodes_to_poll]
        return top_nodes

    for idx, row in pivot_df.iterrows():
        current_time_step = idx
        delta_t_global = current_time_step - min(last_update_times.values())
        currently_polled_nodes = set()

        for mote, last_time in last_update_times.items():
            if current_time_step - last_time >= force_pull_threshold:
                currently_polled_nodes.add(mote)
                #print(f"Force pulling mote: {mote} at time step: {current_time_step}")

        top_sensors = select_top_nodes(state_estimates, rate_of_change, last_sensed_values, num_nodes_to_poll, delta_t_global, currently_polled_nodes)
        top_sensors = list(currently_polled_nodes) + top_sensors

        for mote in top_sensors:
            polled_count[mote] += 1
            measured_value = row[mote]
            previous_state = state_estimates[mote]
            previous_rate = rate_of_change[mote]
            delta_t = max(current_time_step - last_update_times[mote], 1)

            state_estimates[mote] = alpha * measured_value + (1 - alpha) * (previous_state + previous_rate * delta_t)
            state_estimates[mote] = np.clip(state_estimates[mote], -max_value, max_value)

            if delta_t != 0:
                rate_of_change[mote] = beta * (state_estimates[mote] - previous_state) / delta_t + (1 - beta) * previous_rate
                rate_of_change[mote] = np.clip(rate_of_change[mote], -max_value, max_value)

            predicted_measurement = predict_node_state(state_estimates[mote], rate_of_change[mote], delta_t)
            
            if abs(measured_value - predicted_measurement) > theta:
                transmitted_count[mote] += 1
                last_sensed_values[mote] = measured_value

                valuable_sensor_data.append({
                    'index': current_time_step,
                    'selected_moteid': mote,
                    'temperature': measured_value,
                    'predicted_temperature': predicted_measurement,
                    'time_elapsed': delta_t
                })

            currently_polled_nodes.add(mote)

        for mote in top_sensors:
            last_update_times[mote] = current_time_step

    valuable_sensor_df = pd.DataFrame(valuable_sensor_data)

    valuable_sensor_df['squared_error'] = (valuable_sensor_df['temperature'] - valuable_sensor_df['predicted_temperature']) ** 2
    average_mse = valuable_sensor_df['squared_error'].mean()

    time_steps = len(pivot_df)
    fw = {mote: polled_count[mote] / time_steps for mote in polled_count}
    ft = {mote: transmitted_count[mote] / time_steps for mote in transmitted_count}

    average_lifetime_hours = np.mean([
        E_max / (ft[mote] * E_t + fw[mote] * (E_s + 3 * E_w) + (1 - fw[mote]) * E_0) for mote in polled_count
    ]) / 3600

    average_lifetime_years = average_lifetime_hours / 8760

    return average_mse, average_lifetime_years

results = {}

for num_nodes_to_poll in num_nodes_to_poll_list:
    print(f"Running simulation for {num_nodes_to_poll} nodes to poll...")
    mse, lifetime = run_simulation(num_nodes_to_poll)
    results[num_nodes_to_poll] = {'MSE': mse, 'Lifetime (years)': lifetime}
    print(f"Completed simulation for {num_nodes_to_poll} nodes to poll.")

# Print the results
for num_nodes_to_poll, metrics in results.items():
    print(f"Nodes to poll: {num_nodes_to_poll}")
    print(f"  Average MSE: {metrics['MSE']}")
    print(f"  Average sensor lifetime (years): {metrics['Lifetime (years)']}")


Running simulation for 1 nodes to poll...
Completed simulation for 1 nodes to poll.
Running simulation for 2 nodes to poll...
Completed simulation for 2 nodes to poll.
Running simulation for 5 nodes to poll...
Completed simulation for 5 nodes to poll.
Running simulation for 10 nodes to poll...
Completed simulation for 10 nodes to poll.
Running simulation for 20 nodes to poll...
Completed simulation for 20 nodes to poll.
Nodes to poll: 1
  Average MSE: 169.56572969620038
  Average sensor lifetime (years): 2.9015970421538655
Nodes to poll: 2
  Average MSE: 10.603275167715244
  Average sensor lifetime (years): 1.5726154728002164
Nodes to poll: 5
  Average MSE: 6.521228760298459
  Average sensor lifetime (years): 0.7615553546065824
Nodes to poll: 10
  Average MSE: 5.247754085306816
  Average sensor lifetime (years): 0.4262861804818935
Nodes to poll: 20
  Average MSE: 3.365237220130791
  Average sensor lifetime (years): 0.20127754057768368


In [7]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Load the pivot DataFrame
pivot_df = pd.read_csv('top_fifty_motes_temperatures.csv')
pivot_df = pivot_df.head(10000)

# Initialize parameters
alpha = 0.04
beta = 0.1
initial_state = 20.0  # Initial temperature estimate
initial_rate = 0.01   # Initial rate of change estimate
theta = 0.5  # Threshold for significant change
num_nodes_to_poll_list = [1, 2, 5, 10, 20]  # List of numbers of nodes to poll
max_value = 40  # Maximum value to clamp to prevent overflow

# Force pull threshold
force_pull_threshold = 200  # Forcefully pull a node if it hasn't been pulled in the last 10 time steps

# Energy parameters in Joules
E_max = 162000  # Battery capacity in Joules
E_t = 50 / 1000  # Transmission energy in Joules
E_s = 10 / 1000  # Sensing energy in Joules
E_w = 10 / 1000  # Wake-up energy in Joules
E_0 = 1 / 1000   # Sleep energy in Joules

def run_simulation(num_nodes_to_poll):
    state_estimates = {f'mote{i}': initial_state for i in range(1, 51)}
    rate_of_change = {f'mote{i}': initial_rate for i in range(1, 51)}
    
    last_sensed_values = {f'mote{i}': initial_state for i in range(1, 51)}
    last_update_times = {f'mote{i}': 0 for i in range(1, 51)}
    
    valuable_sensor_data = []
    polled_count = {f'mote{i}': 0 for i in range(1, 51)}
    transmitted_count = {f'mote{i}': 0 for i in range(1, 51)}
    
    def predict_node_state(x_hat, x_rate, delta_t):
        result = x_hat + x_rate * delta_t
        return np.clip(result, -max_value, max_value)
    
    def select_nodes_to_poll(state_estimates, rate_of_change, last_update_times, current_time_step, num_nodes_to_poll):
        predicted_values = {mote: predict_node_state(state_estimates[mote], rate_of_change[mote], current_time_step - last_update_times[mote]) for mote in state_estimates}
        value_changes = {mote: abs(predicted_values[mote] - state_estimates[mote]) for mote in state_estimates}
        sorted_motes = sorted(value_changes, key=value_changes.get, reverse=True)
        top_motes = sorted_motes[:num_nodes_to_poll]
        nodes_to_poll = [mote for mote in top_motes if value_changes[mote] > theta]
        return nodes_to_poll
    
    for idx, row in pivot_df.iterrows():
        current_time_step = idx
        
        # Select nodes to poll based on predictions
        nodes_to_poll = select_nodes_to_poll(state_estimates, rate_of_change, last_update_times, current_time_step, num_nodes_to_poll)
        
        for mote in nodes_to_poll:
            polled_count[mote] += 1
            measured_value = row[mote]
            previous_state = state_estimates[mote]
            previous_rate = rate_of_change[mote]
            delta_t = max(current_time_step - last_update_times[mote], 1)

            # Update state estimate based on new measurement
            state_estimates[mote] = alpha * measured_value + (1 - alpha) * (previous_state + previous_rate * delta_t)
            state_estimates[mote] = np.clip(state_estimates[mote], -max_value, max_value)

            if delta_t != 0:
                rate_of_change[mote] = beta * (state_estimates[mote] - previous_state) / delta_t + (1 - beta) * previous_rate
                rate_of_change[mote] = np.clip(rate_of_change[mote], -max_value, max_value)

            predicted_measurement = predict_node_state(previous_state, previous_rate, delta_t)

            # Calculate the error between the predicted value and the actual measurement
            error = abs(measured_value - predicted_measurement)

            valuable_sensor_data.append({
                'index': current_time_step,
                'selected_moteid': mote,
                'temperature': measured_value,
                'predicted_temperature': predicted_measurement,
                'time_elapsed': delta_t,
                'error': error
            })

            transmitted_count[mote] += 1

        for mote in nodes_to_poll:
            last_update_times[mote] = current_time_step

    valuable_sensor_df = pd.DataFrame(valuable_sensor_data)

    valuable_sensor_df['squared_error'] = valuable_sensor_df['error'] ** 2
    average_mse = valuable_sensor_df['squared_error'].mean()

    time_steps = len(pivot_df)
    fw = {mote: polled_count[mote] / time_steps for mote in polled_count}
    ft = {mote: transmitted_count[mote] / time_steps for mote in transmitted_count}

    average_lifetime_hours = np.mean([
        E_max / (ft[mote] * E_t + fw[mote] * (E_s + 3 * E_w) + (1 - fw[mote]) * E_0) for mote in polled_count
    ]) / 3600

    average_lifetime_years = average_lifetime_hours / 8760

    return average_mse, average_lifetime_years

results = {}

for num_nodes_to_poll in num_nodes_to_poll_list:
    print(f"Running simulation for {num_nodes_to_poll} nodes to poll...")
    mse, lifetime = run_simulation(num_nodes_to_poll)
    results[num_nodes_to_poll] = {'MSE': mse, 'Lifetime (years)': lifetime}
    print(f"Completed simulation for {num_nodes_to_poll} nodes to poll.")

# Print the results
for num_nodes_to_poll, metrics in results.items():
    print(f"Nodes to poll: {num_nodes_to_poll}")
    print(f"  Average MSE: {metrics['MSE']}")
    print(f"  Average sensor lifetime (years): {metrics['Lifetime (years)']}")


Running simulation for 1 nodes to poll...
Completed simulation for 1 nodes to poll.
Running simulation for 2 nodes to poll...
Completed simulation for 2 nodes to poll.
Running simulation for 5 nodes to poll...
Completed simulation for 5 nodes to poll.
Running simulation for 10 nodes to poll...
Completed simulation for 10 nodes to poll.
Running simulation for 20 nodes to poll...
Completed simulation for 20 nodes to poll.
Nodes to poll: 1
  Average MSE: 36.75545825655909
  Average sensor lifetime (years): 4.1701116243480465
Nodes to poll: 2
  Average MSE: 62.87172913475819
  Average sensor lifetime (years): 3.6219757380046333
Nodes to poll: 5
  Average MSE: 62.01394651854834
  Average sensor lifetime (years): 3.6168944921499486
Nodes to poll: 10
  Average MSE: 62.42317290681951
  Average sensor lifetime (years): 3.61551119879931
Nodes to poll: 20
  Average MSE: 62.408403681262165
  Average sensor lifetime (years): 3.6173411473571426


: 

In [5]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Load the pivot DataFrame
pivot_df = pd.read_csv('top_fifty_motes_temperatures.csv')
pivot_df = pivot_df.head(10000)

# Initialize parameters
alpha = 0.2
beta = 0.2
initial_state = 20.0  # Initial temperature estimate
initial_rate = 0.01   # Initial rate of change estimate
theta = 0.5  # Threshold for significant change
num_nodes_to_poll_list = [1, 2, 5, 10, 20]  # List of numbers of nodes to poll
max_value = 40  # Maximum value to clamp to prevent overflow

# Force pull threshold
force_pull_threshold = 200  # Forcefully pull a node if it hasn't been pulled in the last 10 time steps

# Energy parameters in Joules
E_max = 162000  # Battery capacity in Joules
E_t = 50 / 1000  # Transmission energy in Joules
E_s = 10 / 1000  # Sensing energy in Joules
E_w = 10 / 1000  # Wake-up energy in Joules
E_0 = 1 / 1000   # Sleep energy in Joules

def run_simulation(num_nodes_to_poll):
    state_estimates = {f'mote{i}': initial_state for i in range(1, 51)}
    rate_of_change = {f'mote{i}': initial_rate for i in range(1, 51)}
    
    last_sensed_values = {f'mote{i}': initial_state for i in range(1, 51)}
    last_update_times = {f'mote{i}': 0 for i in range(1, 51)}
    
    valuable_sensor_data = []
    polled_count = {f'mote{i}': 0 for i in range(1, 51)}
    transmitted_count = {f'mote{i}': 0 for i in range(1, 51)}
    previously_polled_nodes = set()
    
    def predict_node_state(x_hat, x_rate, delta_t):
        result = x_hat + x_rate * delta_t
        return np.clip(result, -max_value, max_value)
    
    def select_nodes_to_poll(state_estimates, rate_of_change, last_update_times, current_time_step, num_nodes_to_poll, previously_polled_nodes):
        predicted_values = {mote: predict_node_state(state_estimates[mote], rate_of_change[mote], current_time_step - last_update_times[mote]) for mote in state_estimates}
        value_changes = {mote: abs(predicted_values[mote] - state_estimates[mote]) for mote in state_estimates}
        sorted_motes = sorted(value_changes, key=value_changes.get, reverse=True)
        top_motes = [mote for mote in sorted_motes if mote not in previously_polled_nodes][:num_nodes_to_poll]
        nodes_to_poll = [mote for mote in top_motes if value_changes[mote] > theta]
        return nodes_to_poll
    
    for idx, row in pivot_df.iterrows():
        current_time_step = idx
        
        # Select nodes to poll based on predictions
        nodes_to_poll = select_nodes_to_poll(state_estimates, rate_of_change, last_update_times, current_time_step, num_nodes_to_poll, previously_polled_nodes)
        
        for mote in nodes_to_poll:
            polled_count[mote] += 1
            measured_value = row[mote]
            previous_state = state_estimates[mote]
            previous_rate = rate_of_change[mote]
            delta_t = max(current_time_step - last_update_times[mote], 1)

            # Update state estimate based on new measurement
            state_estimates[mote] = alpha * measured_value + (1 - alpha) * (previous_state + previous_rate * delta_t)
            state_estimates[mote] = np.clip(state_estimates[mote], -max_value, max_value)

            if delta_t != 0:
                rate_of_change[mote] = beta * (state_estimates[mote] - previous_state) / delta_t + (1 - beta) * previous_rate
                #rate_of_change[mote] = np.clip(rate_of_change[mote], -max_value, max_value)

            predicted_measurement = predict_node_state(previous_state, previous_rate, delta_t)

            # Calculate the error between the predicted value and the actual measurement
            error = abs(measured_value - predicted_measurement)

            valuable_sensor_data.append({
                'index': current_time_step,
                'selected_moteid': mote,
                'temperature': measured_value,
                'predicted_temperature': predicted_measurement,
                'time_elapsed': delta_t,
                'error': error
            })

            transmitted_count[mote] += 1

        previously_polled_nodes = set(nodes_to_poll)

        for mote in nodes_to_poll:
            last_update_times[mote] = current_time_step

    valuable_sensor_df = pd.DataFrame(valuable_sensor_data)

    valuable_sensor_df['squared_error'] = valuable_sensor_df['error'] ** 2
    average_mse = valuable_sensor_df['squared_error'].mean()

    time_steps = len(pivot_df)
    fw = {mote: polled_count[mote] / time_steps for mote in polled_count}
    ft = {mote: transmitted_count[mote] / time_steps for mote in transmitted_count}

    average_lifetime_hours = np.mean([
        E_max / (ft[mote] * E_t + fw[mote] * (E_s + 3 * E_w) + (1 - fw[mote]) * E_0) for mote in polled_count
    ]) / 3600

    average_lifetime_years = average_lifetime_hours / 8760

    return average_mse, average_lifetime_years

results = {}

for num_nodes_to_poll in num_nodes_to_poll_list:
    print(f"Running simulation for {num_nodes_to_poll} nodes to poll...")
    mse, lifetime = run_simulation(num_nodes_to_poll)
    results[num_nodes_to_poll] = {'MSE': mse, 'Lifetime (years)': lifetime}
    print(f"Completed simulation for {num_nodes_to_poll} nodes to poll.")

# Print the results
for num_nodes_to_poll, metrics in results.items():
    print(f"Nodes to poll: {num_nodes_to_poll}")
    print(f"  Average MSE: {metrics['MSE']}")
    print(f"  Average sensor lifetime (years): {metrics['Lifetime (years)']}")


Running simulation for 1 nodes to poll...
Completed simulation for 1 nodes to poll.
Running simulation for 2 nodes to poll...
Completed simulation for 2 nodes to poll.
Running simulation for 5 nodes to poll...
Completed simulation for 5 nodes to poll.
Running simulation for 10 nodes to poll...
Completed simulation for 10 nodes to poll.
Running simulation for 20 nodes to poll...
Completed simulation for 20 nodes to poll.
Nodes to poll: 1
  Average MSE: 11.147858275689558
  Average sensor lifetime (years): 4.564162395218127
Nodes to poll: 2
  Average MSE: 12.265623976513933
  Average sensor lifetime (years): 4.3733615946733995
Nodes to poll: 5
  Average MSE: 12.223899309818377
  Average sensor lifetime (years): 4.377980436224297
Nodes to poll: 10
  Average MSE: 12.342890891475198
  Average sensor lifetime (years): 4.375220438447269
Nodes to poll: 20
  Average MSE: 12.3592806390413
  Average sensor lifetime (years): 4.376748759876009


# UCB based pooling technique for the pooling

In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Load the pivot DataFrame
pivot_df = pd.read_csv('top_fifty_motes_temperatures.csv')
pivot_df = pivot_df.head(10000)

# Initialize parameters
alpha = 0.04
beta = 0.1
initial_state = 20.0  # Initial temperature estimate
initial_rate = 0.01   # Initial rate of change estimate
theta = 0.5  # Threshold for significant change
num_nodes_to_poll_list = [1, 2, 5, 10, 20]  # List of numbers of nodes to poll
max_value = 40  # Maximum value to clamp to prevent overflow

# Force pull threshold
force_pull_threshold = 200  # Forcefully pull a node if it hasn't been pulled in the last 10 time steps

# Energy parameters in Joules
E_max = 162000  # Battery capacity in Joules
E_t = 50 / 1000  # Transmission energy in Joules
E_s = 10 / 1000  # Sensing energy in Joules
E_w = 10 / 1000  # Wake-up energy in Joules
E_0 = 1 / 1000   # Sleep energy in Joules

# Define the BanditArm class
class BanditArm:
    def __init__(self, initial_value=0.):
        self.value_estimate = initial_value
        self.N = 0  # Number of samples collected so far

    def update(self, value):
        self.N += 1
        self.value_estimate = ((self.N - 1) * self.value_estimate + value) / self.N

def ucb(p_estimate, total_pulls, n):
    return p_estimate + np.sqrt(2 * np.log(total_pulls) / n) if n > 0 else float('inf')

def run_simulation(num_nodes_to_poll):
    state_estimates = {f'mote{i}': initial_state for i in range(1, 51)}
    rate_of_change = {f'mote{i}': initial_rate for i in range(1, 51)}
    
    last_sensed_values = {f'mote{i}': initial_state for i in range(1, 51)}
    last_update_times = {f'mote{i}': 0 for i in range(1, 51)}
    
    valuable_sensor_data = []
    polled_count = {f'mote{i}': 0 for i in range(1, 51)}
    transmitted_count = {f'mote{i}': 0 for i in range(1, 51)}
    previously_polled_nodes = set()
    
    bandit_arms = {f'mote{i}': BanditArm(initial_value=initial_rate) for i in range(1, 51)}
    total_pulls = 0
    
    def predict_node_state(x_hat, x_rate, delta_t):
        result = x_hat + x_rate * delta_t
        return np.clip(result, -max_value, max_value)
    
    def select_nodes_to_poll(state_estimates, rate_of_change, last_update_times, current_time_step, num_nodes_to_poll, previously_polled_nodes):
        ucb_values = {mote: ucb(bandit_arms[mote].value_estimate, total_pulls, bandit_arms[mote].N) for mote in state_estimates}
        sorted_motes = sorted(ucb_values.items(), key=lambda x: x[1], reverse=True)
        top_motes = [mote for mote, _ in sorted_motes if mote not in previously_polled_nodes][:num_nodes_to_poll]
        nodes_to_poll = [mote for mote in top_motes if abs(state_estimates[mote] - predict_node_state(state_estimates[mote], rate_of_change[mote], current_time_step - last_update_times[mote])) > theta]
        return nodes_to_poll
    
    for idx, row in pivot_df.iterrows():
        current_time_step = idx
        total_pulls += 1
        
        # Select nodes to poll based on UCB values
        nodes_to_poll = select_nodes_to_poll(state_estimates, rate_of_change, last_update_times, current_time_step, num_nodes_to_poll, previously_polled_nodes)
        
        for mote in nodes_to_poll:
            polled_count[mote] += 1
            measured_value = row[mote]
            previous_state = state_estimates[mote]
            previous_rate = rate_of_change[mote]
            delta_t = max(current_time_step - last_update_times[mote], 1)

            # Update state estimate based on new measurement
            state_estimates[mote] = alpha * measured_value + (1 - alpha) * (previous_state + previous_rate * delta_t)
            state_estimates[mote] = np.clip(state_estimates[mote], -max_value, max_value)

            if delta_t != 0:
                rate_of_change[mote] = beta * (state_estimates[mote] - previous_state) / delta_t + (1 - beta) * previous_rate
                rate_of_change[mote] = np.clip(rate_of_change[mote], -max_value, max_value)
                bandit_arms[mote].update(rate_of_change[mote])

            predicted_measurement = predict_node_state(previous_state, previous_rate, delta_t)

            # Calculate the error between the predicted value and the actual measurement
            error = abs(measured_value - predicted_measurement)

            valuable_sensor_data.append({
                'index': current_time_step,
                'selected_moteid': mote,
                'temperature': measured_value,
                'predicted_temperature': predicted_measurement,
                'time_elapsed': delta_t,
                'error': error
            })

            transmitted_count[mote] += 1

        previously_polled_nodes = set(nodes_to_poll)

        for mote in nodes_to_poll:
            last_update_times[mote] = current_time_step

    valuable_sensor_df = pd.DataFrame(valuable_sensor_data)

    valuable_sensor_df['squared_error'] = valuable_sensor_df['error'] ** 2
    average_mse = valuable_sensor_df['squared_error'].mean()

    time_steps = len(pivot_df)
    fw = {mote: polled_count[mote] / time_steps for mote in polled_count}
    ft = {mote: transmitted_count[mote] / time_steps for mote in transmitted_count}

    average_lifetime_hours = np.mean([
        E_max / (ft[mote] * E_t + fw[mote] * (E_s + 3 * E_w) + (1 - fw[mote]) * E_0) for mote in polled_count
    ]) / 3600

    average_lifetime_years = average_lifetime_hours / 8760

    return average_mse, average_lifetime_years

results = {}

for num_nodes_to_poll in num_nodes_to_poll_list:
    print(f"Running simulation for {num_nodes_to_poll} nodes to poll...")
    mse, lifetime = run_simulation(num_nodes_to_poll)
    results[num_nodes_to_poll] = {'MSE': mse, 'Lifetime (years)': lifetime}
    print(f"Completed simulation for {num_nodes_to_poll} nodes to poll.")

# Print the results
for num_nodes_to_poll, metrics in results.items():
    print(f"Nodes to poll: {num_nodes_to_poll}")
    print(f"  Average MSE: {metrics['MSE']}")
    print(f"  Average sensor lifetime (years): {metrics['Lifetime (years)']}")


Running simulation for 1 nodes to poll...
Completed simulation for 1 nodes to poll.
Running simulation for 2 nodes to poll...
Completed simulation for 2 nodes to poll.
Running simulation for 5 nodes to poll...
Completed simulation for 5 nodes to poll.
Running simulation for 10 nodes to poll...
Completed simulation for 10 nodes to poll.
Running simulation for 20 nodes to poll...
Completed simulation for 20 nodes to poll.
Nodes to poll: 1
  Average MSE: 46.130962607888904
  Average sensor lifetime (years): 4.267929174975608
Nodes to poll: 2
  Average MSE: 87.18462888496029
  Average sensor lifetime (years): 3.6903765659137244
Nodes to poll: 5
  Average MSE: 76.06150550519348
  Average sensor lifetime (years): 3.640720505079669
Nodes to poll: 10
  Average MSE: 70.9806230288029
  Average sensor lifetime (years): 3.638421720051902
Nodes to poll: 20
  Average MSE: 66.04893558073658
  Average sensor lifetime (years): 3.6266998715429226
