## Performance analysis of RL models

In [7]:
# Import required libraries for model analysis
import os
import pickle
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
from state_env import State  # module with environment and dynamics
from RL_brain_pi_deep import DQNPrioritizedReplay  # sumtree version of DQN

# Example: Set the path to a model directory (change as needed)
model_dir = 'modelos_exitosos/n10_10amp_10prob'

# Paths to stats files
training_metrics_path = os.path.join(model_dir, 'training_metrics.pkl')
validation_metrics_path = os.path.join(model_dir, 'validation_metrics.pkl')

# Paths to model (e.g., best_model or final_model)
best_model_path = os.path.join(model_dir, 'best_model')
final_model_path = os.path.join(model_dir, 'final_model')

In [8]:
# Utility function to load training_results.txt as DataFrame
def load_training_results(path):
    import pandas as pd
    import os
    if os.path.exists(path):
        try:
            # Try common delimiters
            for sep in ['\t', ',', ';', ' ']:
                try:
                    df = pd.read_csv(path, sep=sep)
                    if df.shape[1] > 1:
                        return df
                except Exception:
                    continue
            # If all fail, try to read as a single column
            df = pd.read_csv(path, header=None)
            return df
        except Exception as e:
            print(f"Error loading {path}: {e}")
            return None
    else:
        print(f"File not found: {path}")
        return None

In [9]:
from ipywidgets import widgets
from IPython.display import display, clear_output

# Directory selection for modelos_exitosos
modelos_exitosos_dir = 'modelos_exitosos'

# List available subdirectories
available_dirs = [d for d in os.listdir(modelos_exitosos_dir) if os.path.isdir(os.path.join(modelos_exitosos_dir, d))]

model_dir_selector = widgets.Dropdown(
    options=available_dirs,
    description='Model Dir:',
    value=available_dirs[0] if available_dirs else None
)

# Output widget for dashboard
dashboard_output = widgets.Output()

def update_dashboard(change=None):
    with dashboard_output:
        clear_output(wait=True)
        selected_dir = model_dir_selector.value
        if selected_dir:
            model_dir = os.path.join(modelos_exitosos_dir, selected_dir)
            training_results_path = os.path.join(model_dir, 'training_results.txt')
            training_results_df = load_training_results(training_results_path)
            if training_results_df is not None:
                columns = list(training_results_df.columns)
                if len(columns) > 1:
                    x_selector = widgets.Dropdown(options=columns, description='X axis:')
                    y_default = 'max_fidelity' if 'max_fidelity' in columns else columns[0]
                    y_selector = widgets.Dropdown(options=columns, value=y_default, description='Y axis:')
                    bin_size_selector = widgets.IntSlider(value=100, min=1, max=1000, step=1, description='Bin size:')
                    output2 = widgets.Output()

                    def on_column_or_bin_change(change=None):
                        with output2:
                            clear_output(wait=True)
                            x = x_selector.value
                            y = y_selector.value
                            bin_size = bin_size_selector.value
                            x_data = training_results_df[x].values
                            y_data = training_results_df[y].values
                            if bin_size > 1:
                                n_bins = len(x_data) // bin_size
                                if n_bins > 0:
                                    x_binned = np.mean(x_data[:n_bins*bin_size].reshape(-1, bin_size), axis=1)
                                    y_binned = np.mean(y_data[:n_bins*bin_size].reshape(-1, bin_size), axis=1)
                                    plt.figure(figsize=(8,5))
                                    plt.plot(x_binned, y_binned, marker='o')
                                    plt.xlabel(f'{x} (binned, bin size={bin_size})')
                                    plt.ylabel(f'{y} (binned)')
                                    plt.title(f'{y} vs {x} (binned)')
                                else:
                                    plt.figure(figsize=(8,5))
                                    plt.plot(x_data, y_data, marker='o')
                                    plt.xlabel(x)
                                    plt.ylabel(y)
                                    plt.title(f'{y} vs {x}')
                            else:
                                plt.figure(figsize=(8,5))
                                plt.plot(x_data, y_data, marker='o')
                                plt.xlabel(x)
                                plt.ylabel(y)
                                plt.title(f'{y} vs {x}')
                            plt.grid(True)
                            plt.show()

                    x_selector.observe(on_column_or_bin_change, names='value')
                    y_selector.observe(on_column_or_bin_change, names='value')
                    bin_size_selector.observe(on_column_or_bin_change, names='value')
                    display(widgets.VBox([
                        widgets.Label(f'Select columns from {selected_dir}/training_results.txt to plot (with binning option):'),
                        widgets.HBox([x_selector, y_selector, bin_size_selector]),
                        output2
                    ]))
                    on_column_or_bin_change()
                else:
                    display(training_results_df)
            else:
                print('No training_results.txt data available in', selected_dir)
        else:
            print('No directory selected.')

In [10]:
from ipywidgets import widgets
from IPython.display import display, clear_output

# Directory selection for modelos_exitosos (checkboxes)
modelos_exitosos_dir = 'modelos_exitosos'
available_dirs = [d for d in os.listdir(modelos_exitosos_dir) if os.path.isdir(os.path.join(modelos_exitosos_dir, d))]

dirs_selector = widgets.SelectMultiple(
    options=available_dirs,
    value=(available_dirs[0],) if available_dirs else (),
    description='Dirs:',
    disabled=False
)

def get_metric_options(selected_dirs):
    # Try to get columns from the first selected directory with a valid file
    for d in selected_dirs:
        path = os.path.join(modelos_exitosos_dir, d, 'training_results.txt')
        df = load_training_results(path)
        if df is not None and len(df.columns) > 0:
            return list(df.columns)
    return []

# Output widget for dashboard
dashboard_output = widgets.Output()

# Main update function
def update_dashboard_multi(change=None):
    with dashboard_output:
        clear_output(wait=True)
        selected_dirs = list(dirs_selector.value)
        if not selected_dirs:
            print('No directories selected.')
            return
        columns = get_metric_options(selected_dirs)
        if not columns:
            print('No valid training_results.txt found in selected directories.')
            return
        y_default = 'max_fidelity' if 'max_fidelity' in columns else columns[0]
        y_selector = widgets.Dropdown(options=columns, value=y_default, description='Metric:')
        bin_size_selector = widgets.IntSlider(value=100, min=1, max=1000, step=1, description='Bin size:')
        output2 = widgets.Output()

        def on_metric_or_bin_change(change=None):
            with output2:
                clear_output(wait=True)
                y = y_selector.value
                bin_size = bin_size_selector.value
                plt.figure(figsize=(10,6))
                for d in selected_dirs:
                    path = os.path.join(modelos_exitosos_dir, d, 'training_results.txt')
                    df = load_training_results(path)
                    if df is not None and y in df.columns:
                        x = df.index.values
                        y_data = df[y].values
                        if bin_size > 1:
                            n_bins = len(x) // bin_size
                            if n_bins > 0:
                                x_binned = np.mean(x[:n_bins*bin_size].reshape(-1, bin_size), axis=1)
                                y_binned = np.mean(y_data[:n_bins*bin_size].reshape(-1, bin_size), axis=1)
                                plt.plot(x_binned, y_binned, marker='o', label=d)
                            else:
                                plt.plot(x, y_data, marker='o', label=d)
                        else:
                            plt.plot(x, y_data, marker='o', label=d)
                plt.xlabel('Episode (binned if bin size > 1)')
                plt.ylabel(y)
                plt.title(f'{y} vs Episode for selected directories')
                plt.legend()
                plt.grid(True)
                plt.show()

        y_selector.observe(on_metric_or_bin_change, names='value')
        bin_size_selector.observe(on_metric_or_bin_change, names='value')
        dirs_selector.observe(update_dashboard_multi, names='value')
        display(widgets.VBox([
            widgets.Label('Select directories and metric to compare:'),
            dirs_selector,
            y_selector,
            bin_size_selector,
            output2
        ]))
        on_metric_or_bin_change()

display(dashboard_output)
update_dashboard_multi()

Output()

In [11]:
# Load training and validation metrics from pickle files
def load_metrics(metrics_path):
    if os.path.exists(metrics_path):
        with open(metrics_path, 'rb') as f:
            metrics = pickle.load(f)
        return metrics
    else:
        print(f"File not found: {metrics_path}")
        return None

training_metrics = load_metrics(training_metrics_path)
validation_metrics = load_metrics(validation_metrics_path)

# Display keys to understand the structure
if training_metrics:
    print('Training metrics keys:', training_metrics.keys())
if validation_metrics:
    print('Validation metrics keys:', validation_metrics.keys())

# Plot training and validation statistics (example: loss and fidelity)
def plot_metrics(training_metrics, validation_metrics, metric_name):
    plt.figure(figsize=(8,5))
    if training_metrics and metric_name in training_metrics:
        plt.plot(training_metrics[metric_name], label=f'Training {metric_name}')
    if validation_metrics and metric_name in validation_metrics:
        plt.plot(validation_metrics[metric_name], label=f'Validation {metric_name}')
    plt.xlabel('Epoch')
    plt.ylabel(metric_name.capitalize())
    plt.title(f'{metric_name.capitalize()} over Epochs')
    plt.legend()
    plt.grid(True)
    plt.show()

# Example usage (uncomment and adjust metric names as needed):
# plot_metrics(training_metrics, validation_metrics, 'loss')
# plot_metrics(training_metrics, validation_metrics, 'fidelity')

Training metrics keys: dict_keys(['soft_success_training_rate', 'true_success_training_rate', 'training max fidelity', 'training average fidelity', 'average QValue', 'training_time', 'number_of_episodes', 'pruned'])
Validation metrics keys: dict_keys(['general_val_fidelity', 'average_val_fidelity', 'average_time_max_fidelity', 'average_QValue', 'validation_episodes'])


# Validation

In [12]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from ipywidgets import Dropdown, IntText, VBox, HBox, Output, Button
from IPython.display import display, clear_output
import tensorflow as tf
from state_env import State
from RL_brain_pi_deep import DQNPrioritizedReplay
import configparser

# List available directories
modelos_exitosos_dir = 'modelos_exitosos'
available_dirs = [d for d in os.listdir(modelos_exitosos_dir) if os.path.isdir(os.path.join(modelos_exitosos_dir, d))]

def get_ini_path(model_dir):
    files = os.listdir(model_dir)
    for f in files:
        if f.endswith('.ini'):
            return os.path.join(model_dir, f)
    return None

dir_selector = Dropdown(options=available_dirs, description='Dir:')
val_episodes_input = IntText(value=10, description='Val episodes:')
run_button = Button(description='Run Validation', button_style='success')
output = Output()

def run_validation_and_plot(change=None):
    with output:
        clear_output(wait=True)
        selected_dir = dir_selector.value
        validation_episodes = val_episodes_input.value
        model_dir = os.path.join(modelos_exitosos_dir, selected_dir)
        ini_path = get_ini_path(model_dir)
        if not ini_path:
            print('No ini file found in', model_dir)
            return
        config_instance = configparser.ConfigParser()
        config_instance.read(ini_path)
        tf.compat.v1.reset_default_graph()
        results = []
        with tf.compat.v1.Session() as sess:
            RL_val = DQNPrioritizedReplay(config_instance=config_instance, sess=sess)
            saver = tf.compat.v1.train.Saver()
            checkpoint_path = os.path.join(model_dir, 'best_model', 'model.ckpt')
            if not tf.io.gfile.exists(checkpoint_path + '.index'):
                print('Checkpoint not found:', checkpoint_path)
                return
            saver.restore(sess, checkpoint_path)
            env = State(config_instance=config_instance)
            max_t_steps = config_instance.getint('system_parameters', 'max_t_steps')
            for episode in range(validation_episodes):
                observation = env.reset()
                Q = 0
                fid_max = 0
                t_fid_max = 0
                for i in range(max_t_steps):
                    action = RL_val.choose_action(observation, eval=True)
                    observation_, reward, fidelity = env.step(action)
                    observation = observation_.copy()
                    Q += reward
                    if fidelity > fid_max:
                        fid_max = fidelity
                        t_fid_max = i
                results.append({
                    'episode': episode,
                    'Qvalue': Q,
                    'max_fidelity': fid_max,
                    'time_max_fidelity': t_fid_max,
                    'final_fidelity': fidelity,
                    'time_final_fidelity': i,
                    'epsilon': RL_val.epsilon
                })
        results_df = pd.DataFrame(results)
        # Dashboard plots
        fig, axs = plt.subplots(1, 2, figsize=(14,5))
        axs[0].plot(results_df['episode'], results_df['max_fidelity'], label='Max Fidelity')
        axs[0].plot(results_df['episode'], results_df['final_fidelity'], label='Final Fidelity')
        axs[0].set_xlabel('Episode')
        axs[0].set_ylabel('Fidelity')
        axs[0].set_title('Validation Fidelities per Episode')
        axs[0].legend()
        axs[0].grid(True)
        # Bar plot of max and mean validation fidelity
        max_fid = np.max(results_df['max_fidelity'])
        mean_fid = np.mean(results_df['max_fidelity'])
        axs[1].bar(['Max Fidelity', 'Mean Fidelity'], [max_fid, mean_fid], color=['tab:blue', 'tab:orange'])
        axs[1].set_ylim(0, 1.05)
        axs[1].set_title('Max and Mean Validation Fidelity')
        for i, v in enumerate([max_fid, mean_fid]):
            axs[1].text(i, v + 0.01, f'{v:.4f}', ha='center')
        plt.tight_layout()
        plt.show()
        display(results_df.head())

run_button.on_click(run_validation_and_plot)
display(VBox([HBox([dir_selector, val_episodes_input, run_button]), output]))

VBox(children=(HBox(children=(Dropdown(description='Dir:', options=('n10_10amp_50prob', 'n16_20amp_20prob', 'n…