In [1]:
import numpy as np
import matplotlib.pyplot as plt
from mdp_module import MDP, run_simulation
import seaborn as sns

In [2]:
# Resource Allocation in Sidelink V2X: Example Notebook
# This notebook demonstrates step-by-step how to simulate the 4-state MDP for V2X resource allocation."

In [3]:
# Define parameters for the MDP
num_states  = 4  # s1=idle, s2=success, s3=collision, s4=unavailable
num_actions = 3  # 
T=100 # Number of time steps
s0=0  # Initial state idle
gamma=1.0  # Discount factor (1.0 means no change at MDP)

In [4]:
#Define probabilities
P = np.zeros((num_states, num_actions, num_states))
for a in range(num_actions):
    P[0, a, 0] = 0.1  
    P[0, a, 1] = 0.9  
    P[1, a, 1] = 0.4 - 0.05 * a  
    P[1, a, 2] = 0.6 + 0.05 * a  
    P[2, a, 0] = 1.0  
    P[3, a, 0] = 1.0

In [5]:
#Define rewards
R = np.zeros((num_states, num_actions))
for a in range(num_actions):
    R[1, a] = 10.0  # s2: busy successful
    R[2, a] = -5.0  # s3: collision

In [6]:
#Define policy
pi_b = np.ones((num_states, num_actions)) / num_actions  
pi_e = np.array([
    [0.7, 0.2, 0.1],  
    [0.7, 0.2, 0.1],
    [0.5, 0.3, 0.2],  
    [0.5, 0.3, 0.2]
])

In [7]:
def plot_results(results, namevar):
    eif_vals = results['eif_vals']
    cum_rewards_e = results['cum_rewards_e']
    rho_true = results['rho_true']
    avg_cum_e = results['avg_cum_e']
    rho_drl = results['rho_drl']
    term_vals = results['term_vals']
    cum_rewards_b = results['cum_rewards_b']
    
    sns.set_style("darkgrid")
    sns.set_theme(style="ticks", font_scale=1.25)
    
    # Histogram of EIF values
    fig1 = plt.figure(figsize=(6, 4))
    sns.histplot(eif_vals, bins=20, color='blue', kde=False)
    plt.title('EIF Values Distribution')
    plt.xlabel('EIF')
    plt.ylabel('Frequency')
    plt.savefig('eif_histogram.pdf', bbox_inches='tight')
    plt.close(fig1)
    
    # Histogram of cumulative rewards under pi_e
    fig2 = plt.figure(figsize=(6, 4))
    sns.histplot(cum_rewards_e, bins=20, color='green', kde=False)
    plt.title('Cumulative Rewards under π_e')
    plt.xlabel('Cumulative Reward')
    plt.ylabel('Frequency')
    plt.savefig('cum_rewards_histogram'+namevar+'.pdf', bbox_inches='tight')
    plt.close(fig2)
    
    # Bar plot of rho values
    fig3 = plt.figure(figsize=(6, 4))
    labels = ['True ρ', 'Empirical Avg (π_e)', 'DRL Est']
    values = [rho_true, avg_cum_e, rho_drl]
    sns.barplot(x=labels, y=values, palette=['red', 'orange', 'purple'])
    plt.title('Comparison of Policy Values')
    plt.ylabel('Value')
    plt.ylim(min(values)*0.95, max(values)*1.05)  # Adjust y-axis limits for better visibility; fixed min/max for list
    plt.axes
    plt.savefig('policy_values_bar'+namevar+'.pdf', bbox_inches='tight')
    plt.close(fig3)
    
    # Scatter plot of term_vals vs. cumulative rewards from behavior data
    fig4 = plt.figure(figsize=(6, 4))
    sns.scatterplot(x=cum_rewards_b, y=term_vals, alpha=0.5)
    plt.title('Terms vs. Cum Rewards (Behavior Data)')
    plt.xlabel('Cum Reward (Behavior)')
    plt.ylabel('EIF Terms')
    plt.savefig('terms_scatter'+namevar+'.pdf', bbox_inches='tight')
    plt.close(fig4)
    
    # Running average of EIF terms with empirical rho
    fig5 = plt.figure(figsize=(6, 4))
    running_avg = np.cumsum(term_vals) / np.arange(1, len(term_vals) + 1)
    sns.lineplot(x=range(len(running_avg)), y=running_avg, label='Running Avg Estimate (EIF Terms)')
    running_avg_e = np.cumsum(cum_rewards_e) / np.arange(1, len(cum_rewards_e) + 1)
    sns.lineplot(x=range(len(running_avg_e)), y=running_avg_e, label='Running Avg Empirical (π_e Rewards)')
    plt.axhline(y=np.mean(term_vals), color='r', linestyle='--', label='Final Estimate (EIF)')
    plt.axhline(y=avg_cum_e, color='g', linestyle='-', label='Empirical ρ (π_e Mean)')
    plt.title('Running Average of EIF Estimate vs. Empirical')
    plt.xlabel('Trajectory #')
    plt.ylabel('Estimate of ρ^πᵉ')
    plt.legend()
    plt.savefig('running_average'+str(namevar)+'.pdf', bbox_inches='tight')
    plt.close(fig5)

In [8]:
def plot_results2(results, namevar):
    eif_vals = results['eif_vals']
    cum_rewards_e = results['cum_rewards_e']
    rho_true = results['rho_true']
    avg_cum_e = results['avg_cum_e']
    rho_drl = results['rho_drl']
    term_vals = results['term_vals']
    cum_rewards_b = results['cum_rewards_b']
    
    sns.set_style("darkgrid")
    sns.set_theme(style="ticks", font_scale=1.25)

    fig, axs = plt.subplots(2, 2, figsize=(12, 8))
    
    # Histogram of EIF values
    sns.histplot(eif_vals, bins=20, color='blue', kde=False, ax=axs[0, 0])
    axs[0, 0].set_title('EIF Values Distribution')
    axs[0, 0].set_xlabel('EIF')
    axs[0, 0].set_ylabel('Frequency')

    # Histogram of cumulative rewards under pi_e
    sns.histplot(cum_rewards_e, bins=20, color='green', kde=False, ax=axs[0, 1])
    axs[0, 1].set_title('Cumulative Rewards under π_e')
    axs[0, 1].set_xlabel('Cumulative Reward')
    axs[0, 1].set_ylabel('Frequency')
    
    # Bar plot of rho values
    labels = ['True ρ', 'Empirical Avg (π_e)', 'DRL Est']
    values = [rho_true, avg_cum_e, rho_drl]
    sns.barplot(x=labels, y=values, palette=['red', 'orange', 'purple'], ax=axs[1, 0])
    axs[1, 0].set_title('Comparison of Policy Values')
    axs[1, 0].set_ylabel('Value')
    axs[1, 0].set_ylim(min(values)*0.95, max(values)*1.05)  # Adjust y-axis limits for better visibility; fixed min/max for list

    
    # Scatter plot of term_vals vs. cumulative rewards from behavior data
    sns.scatterplot(x=cum_rewards_b, y=term_vals, alpha=0.5, ax=axs[1, 1])
    axs[1, 1].set_title('Terms vs. Cum Rewards (Behavior Data)')
    axs[1, 1].set_xlabel('Cum Reward (Behavior)')
    axs[1, 1].set_ylabel('EIF Terms')
    
    plt.tight_layout()
    plt.savefig('grouped_plots'+str(namevar)+'.pdf', bbox_inches='tight')
    plt.close(fig)

In [9]:
#run sim simulation
mdp = MDP(num_states, num_actions, T, s0, P, R, pi_b, pi_e, gamma)
results=run_simulation(mdp, 1000, seed=20)

True rho^{pi^e}: 302.5405
Empirical average cumulative reward under pi_e: 303.1150
Average EIF: -2.3441
EIF variance: 2347.7986
Estimated rho from exact terms: 300.1964
DRL estimate (est nuisances): 288.6000
DRL variance (10 runs): 377.1128
Efficiency bound (Var(EIF)/n): 2.3478
DR with misspecified model (Q/V random, mu exact): 299.1285
DR with misspecified weights (mu random, Q/V exact): 300.8342


In [10]:
plot_results(results, "3")


Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=labels, y=values, palette=['red', 'orange', 'purple'])


In [11]:
plot_results2(results, "3")


Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=labels, y=values, palette=['red', 'orange', 'purple'], ax=axs[1, 0])
