These functions were used to calculate action probability shifts in the RL models. For continous action space you can use the continous_density_shift function while for discrete action space you can use the discrete_density_shift function. You only have to pass the actions taken by the agent with pretrained model and after finetuning and retraining the rl model what were new actions taken during the same rollouts. The function will return the plots of action probability shift for each action.

In [None]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn.neighbors import KernelDensity

In [None]:
def continous_density_shift(pretrained_actions, finetuned_actions):
    # Assuming list1 and list2 are your data lists
    list1 = np.array(pretrained_actions)  # Make sure these are numpy arrays
    list2 = np.array(finetuned_actions)

    # Reshaping data for KDE
    list1 = list1.reshape(-1, 1)
    list2 = list2.reshape(-1, 1)

    # Setting up KDE for both datasets
    kde1 = KernelDensity(kernel='gaussian', bandwidth=0.5).fit(list1)
    kde2 = KernelDensity(kernel='gaussian', bandwidth=0.5).fit(list2)

    # Creating an array of values to evaluate the KDE on
    x_d = np.linspace(min(np.min(list1), np.min(list2)), max(np.max(list1), np.max(list2)), 1000)
    x_d = x_d.reshape(-1, 1)

    # Evaluating KDE for both sets
    log_dens1 = kde1.score_samples(x_d)
    log_dens2 = kde2.score_samples(x_d)

    # Plotting
    plt.fill_between(x_d[:, 0], np.exp(log_dens1), alpha=0.5, label='Pretrained')
    plt.fill_between(x_d[:, 0], np.exp(log_dens2), alpha=0.5, label='Finetuned')

    # Adding labels and title
    plt.xlabel('Action', fontsize=14)
    plt.ylabel('Probability', fontsize=14)
    plt.title('Action Probability Shift', fontsize=14)

    # Create legend & Show graphic
    plt.legend(fontsize=11)
    plt.show()

def discrete_density_shift(pretrained_actions, finetuned_actions):
    unique_actions_pre, action_counts_pre = np.unique(pretrained_actions, return_counts=True)
    unique_actions_post, action_counts_post = np.unique(finetuned_actions, return_counts=True)

    width = 0.4
    space = 0

    unique_actions_pre_adjusted = unique_actions_pre - (width + space) / 2
    unique_actions_post_adjusted = unique_actions_post + (width + space) / 2

    plt.bar(unique_actions_pre_adjusted, action_counts_pre, width, label='Pretrained')
    plt.bar(unique_actions_post_adjusted, action_counts_post, width, label='Finetuned')
    plt.xlabel('Action', fontsize=16)
    plt.ylabel('Frequency', fontsize=16)
    # plt.tick_params(axis='both', which='major', labelsize=14)
    plt.legend()
    plt.title('Discrete Actions Shift', fontsize=16)
    plt.show()