### Imports

In [1]:
import pickle as pkl
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import os


In [138]:
# Color definitions
color_LF = 'xkcd:muted blue'
color_NLF = 'xkcd:coral pink'
color_NLF2 = 'goldenrod'
color_NLF3 = 'xkcd:sage green'
color_RNN = 'xkcd:muted purple'

plt.rcParams.update({
    # Your existing font parameters
    'font.size': 14,
    'axes.titlesize': 30,
    'axes.labelsize': int(40 * 1.3),
    'xtick.labelsize': 35,
    'ytick.labelsize': 35,
    'legend.fontsize': 30,
    'legend.title_fontsize': 30,
    'axes.labelweight': 'bold',
    
    # Tripled axes linewidth (from 1.5 to 4.5)
    'axes.linewidth': 5,
    
    # Doubled plot line width (from 4 to 8)
    'lines.linewidth': 8,
    
    # Doubled grid linewidth (from 0.5)
    'grid.linewidth': 2.0,
    
    # Your other existing parameters
    'figure.figsize': (10, 6),
    'figure.dpi': 100,
    
    'xtick.major.size': 5,
    'xtick.minor.size': 3,
    'ytick.major.size': 5,
    'ytick.minor.size': 3,
    'xtick.major.width': 1.5,
    'xtick.minor.width': 1,
    'ytick.major.width': 1.5,
    'ytick.minor.width': 1,
    'xtick.direction': 'out',
    'ytick.direction': 'out',
    
    'grid.alpha': 0.3,
    'lines.markersize': 8,
    
    'font.family': 'sans-serif',
    'font.sans-serif': ['Arial', 'Helvetica', 'DejaVu Sans'],
    
    'legend.framealpha': 0.8,
    'legend.edgecolor': '0.8',
    
    'figure.autolayout': True,
    
    'savefig.dpi': 300,
    'savefig.format': 'png',
    
    'axes.grid': True,
    'axes.axisbelow': True,
})

plt.rcParams['legend.frameon'] = False
plt.rcParams['axes.spines.top'] = False
plt.rcParams['axes.spines.right'] = False

### Load data

**Data description:**

  *RNN*
- format: dictionary
- key:   tuple (training choice, testing choice)
- value: testing accuracy

*NLF_k*
- format: dictionary
- key:  tuple (training choice, NLF window size, testing choice)
- value:  testing accuracy

*Ranges*
- training choice: [1,8]
- testing choice: [1,8]
- NLF window size: 2,3
- time steps: 500

*Experiment types*
- Levy
- Levy equal distribution of probabilities of all k values
- Non-Levy (nomenclature does not specify as non-Levy, for eg. classifier_accuracies_500steps.pkl)

#### Helpers

In [117]:
RNN_path = os.path.join(os.getcwd(), 'RNN')
NLFw_path = os.path.join(os.getcwd(), 'NLFw')
NLF_path = os.path.join(os.getcwd(), 'NLF')
LF_path = os.path.join(os.getcwd(), 'LF')
seeds = [1000, 2000, 3000, 4000, 5000]

In [118]:
# Define custom exception when file is not found
class NoFilesFoundError(Exception):
    """Exception raised when no fusion run files are found."""
    pass
    
def load_rnn_accuracies(base_path=RNN_path, seeds=[1000, 2000, 3000, 4000, 5000], steps=500, k_range=range(1, 9), include_levy=False, eql_distr=False, stats=False):
    """
    Load RNN accuracies from pickle files and summarize the results.

    This function loads RNN accuracy data from pickle files, either for regular k-based (i.e., Non-Levy) files
    or Levy-based files, depending on the parameters. It then summarizes the data using the
    RNN_summary function.

    Args:
        base_path (str): The base path where the RNN files are stored. Default is RNN_path.
        seeds (list): A list of random seeds used in the RNN training. Default is [1000, 2000, 3000, 4000, 5000].
        steps (int): The number of steps used in the RNN training and testing. Default is 500.
        k_range (range): The range of k values to consider for regular k-based files. Default is range(1, 9).
        include_levy (bool): If True, load Levy-based files instead of regular k-based files. Default is False.
        eql_distr (bool): If True and include_levy is True, load equal distribution Levy files. Default is False.
        stats (bool): If True, include statistical analysis in the summary. Default is False.

    Returns:
        pandas.DataFrame: A DataFrame containing the summarized RNN accuracy data.

    Raises:
        NoFilesFoundError: If no matching RNN files are found in the specified path.
    """

    #print(f"Debug: include_levy is {include_levy}")  # Debug print
    all_runs = {}
    
    if not include_levy:
        print("Loading non-Levy files")  # Debug print
        # Regular k-based files
        for seed in seeds:
            for k in k_range:
                file_path = os.path.join(base_path, f'RNN_train_k_{k}_seed_{seed}_{steps}steps_testaccuracies.pkl')
                if os.path.exists(file_path):
                    with open(file_path, 'rb') as file:
                        data = pkl.load(file)
                        
                        for key, value in data.items():
                            if key not in all_runs:
                                all_runs[key] = []
                            all_runs[key].append(value)
                else:
                    raise NoFilesFoundError(f"No file found in {base_path} with seed={seed} and steps={steps}")              
    
    else:
        print("Loading Levy files")  # Debug print
        # Levy files
        for seed in seeds:
            if eql_distr:
                if seed == 1000:
                    print("Loading equal distribution k Levy files")  # Debug print
                file_path = os.path.join(base_path, f'RNN_train_levy_seed_{seed}_{steps}steps_eqldistr_k_testaccuracies.pkl')
            else:
                file_path = os.path.join(base_path, f'RNN_train_levy_seed_{seed}_{steps}steps_testaccuracies.pkl')
            
            if os.path.exists(file_path):
                with open(file_path, 'rb') as file:
                    data = pkl.load(file)
                    
                    for key, value in data.items():
                        if key not in all_runs:
                            all_runs[key] = []
                        all_runs[key].append(value)
            else:
                raise NoFilesFoundError(f"No file found in {base_path} with seed={seed} and steps={steps}")
    
    print(f"Number of items in all_runs: {len(all_runs)}")  # Debug print
    df = RNN_summary(all_runs, stats)
    return df


def RNN_summary(datadict, stats=False):
    """
    Summarize RNN accuracy data and optionally calculate statistics.

    This function takes a dictionary of RNN accuracy data and converts it into a pandas DataFrame.
    If stats is True, it calculates mean and standard deviation of accuracies for each train-test pair.

    Args:
        datadict (dict): A dictionary containing RNN accuracy data.
            The keys are tuples of (train, test), and the values are lists of accuracies.
        stats (int): If True, calculate and return statistics. Otherwise, return the raw data. Default is False.

    Returns:
        pandas.DataFrame: A DataFrame containing either the raw accuracy data or summary statistics,
                          depending on the value of the 'stats' parameter.
    """

    # Create DataFrame
    df = pd.DataFrame([(k[0], k[1], v) for k, v in datadict.items()], 
                      columns=['train', 'test', 'accuracy'])
    # Explode the accuracy list
    df = df.explode('accuracy')
    
    if stats:
        # Calculate statistics
        stats_df = df.groupby(['train', 'test'])['accuracy'].agg(['mean', lambda x: np.std(x, ddof=1)]).reset_index()
        stats_df.columns = ['train', 'test', 'mean_accuracy', 'std_accuracy']
        return stats_df
    else:
        return df
    

def load_fusion_accuracies(base_path=NLFw_path, steps=500, k_range=range(1, 9), include_levy=False, eql_distr=False):
    """
    Load NLFw (Non-Linear Fusion with sliding time windows), NLF or LF accuracies from pickle files and summarize the results.

    This function loads accuracy data from a single pickle file, which can be either for regular k-based (i.e., Non-Levy) data,
    Levy-based data, or equal distribution Levy-based data, depending on the parameters. It then summarizes the data
    using the fusion_summary function.

    Args:
        base_path (str): The base path where the files are stored. Default is NLFw_path.
        steps (int): The number of steps used in the fusion training. Default is 500.
        k_range (range): The range of k values to consider. Default is range(1, 9). (Note: This parameter is unused in the function body)
        include_levy (bool): If True, load Levy-based file instead of regular k-based file. Default is False.
        eql_distr (bool): If True and include_levy is True, load equal distribution Levy file. Default is False.

    Returns:
        pandas.Dictionary: A Dictionary containing the summarized accuracy data.

    Raises:
        NoFilesFoundError: If no matching fusion file is found in the specified path.
    """

    all_runs = {}
    if not include_levy:
        print("Loading non-Levy files")  # Debug print
        # Regular k-based files
        file_path = os.path.join(base_path, f'classifier_accuracies_{steps}steps.pkl')
    elif eql_distr:
        file_path = os.path.join(base_path, f'classifier_accuracies_{steps}steps_levy_eqldistr_k.pkl')
    else:
        file_path = os.path.join(base_path, f'classifier_accuracies_{steps}steps_levy.pkl')
        
    if os.path.exists(file_path):
        with open(file_path, 'rb') as file:
            data = pkl.load(file)
            
            for key, value in data.items():
                if key not in all_runs:
                    all_runs[key] = []
                all_runs[key].append(value)
            
    else:
        raise NoFilesFoundError(f"No file found in {base_path} with steps={steps}")     
    print(f"Number of items in all_runs: {len(all_runs)}")  # Debug print
    df = fusion_summary(all_runs)
    return df

    
def fusion_summary(datadict, accuracy_mult_by_100=True):
    """
    Summarize NLFw (Non-Linear Fusion with sliding time windows), NLF or LF accuracy data.

    This function takes a dictionary of accuracy data, converts it into a pandas DataFrame,
    and calculates accuracies for each combination of train, test, and window size. 

    Args:
        datadict (dict): A dictionary containing accuracy data.
            The keys are tuples of (train, windowsize, test), and the values are lists of accuracies.
        accuracy_mult_by_100 (bool): If True, multiply the mean accuracy by 100. Default is True.
    

    Returns:
        dict: A dictionary where keys are window sizes and values are pandas DataFrames.
              Each DataFrame contains accuracies for the corresponding window size.
    """
       
    # Create DataFrame
    df = pd.DataFrame([(k[0], k[1], k[2], v) for k, v in datadict.items()], 
                      columns=['train', 'windowsize', 'test', 'accuracy'])
    
    # Explode the accuracy list
    df = df.explode('accuracy')
    
    # Get unique window sizes
    windowsizes = df['windowsize'].unique()
    
    # Split the DataFrame based on windowsize
    dfs = {ws: df[df['windowsize'] == ws] for ws in windowsizes}
    
    # Calculate statistics for each split DataFrame
    stats_dfs = {}
    
    for ws, split_df in dfs.items():
        stats_df = split_df.groupby(['train', 'test'])['accuracy'].agg(['mean']).reset_index()
        stats_df.columns = ['train', 'test', 'mean_accuracy']
        if accuracy_mult_by_100:
            stats_df['mean_accuracy'] *= 100
        stats_df['std_accuracy'] = None # Required column for plotting functionality
        stats_dfs[ws] = stats_df
    return stats_dfs
      

#### Load

In [None]:
RNN_nonlevy = load_rnn_accuracies(steps=500, include_levy=0, eql_distr=0, stats=1)
#RNN_levy = load_rnn_accuracies(steps=500, include_levy=1, eql_distr=0, stats=1)
#RNN_levy_eqldistr = load_rnn_accuracies(steps=500, include_levy=1, eql_distr=1, stats=1)

NLFw_nonlevy = load_fusion_accuracies(steps=500, include_levy=0, eql_distr=0)
#NLFw_levy = load_fusion_accuracies(steps=500, include_levy=1, eql_distr=0)
#NLFw_levy_eqldistr = load_fusion_accuracies(steps=500, include_levy=1, eql_distr=1)

LF_nonlevy = load_fusion_accuracies(base_path=LF_path, steps=500, include_levy=0, eql_distr=0)
#LF_levy = load_fusion_accuracies(base_path=LF_path, steps=500, include_levy=1, eql_distr=0)
#LF_levy_eqldistr = load_fusion_accuracies(base_path=LF_path, steps=500, include_levy=1, eql_distr=1)

NLF_nonlevy = load_fusion_accuracies(base_path=NLF_path, steps=500, include_levy=0, eql_distr=0)
#NLF_levy = load_fusion_accuracies(base_path=NLF_path, steps=500, include_levy=1, eql_distr=0)
#NLF_levy_eqldistr = load_fusion_accuracies(base_path=NLF_path, steps=500, include_levy=1, eql_distr=1)

### Plot

#### Train & Test with error bars

In [130]:
# Plot all train-test scenarios with error bars
def plot_with_error_bars(df, ax=None, title=None, label=None, color=None):
    # Check if necessary columns exist
    required_columns = ['test', 'mean_accuracy', 'train']
    for col in required_columns:
        if col not in df.columns:
            raise ValueError(f"Column '{col}' not found in DataFrame")
    
    if ax is None:
        fig, ax = plt.subplots(figsize=(12, 8))
    
    unique_trains = df['train'].unique()
    print(unique_trains)
    colors = plt.cm.plasma(np.linspace(0, 1, len(unique_trains)))
    color_map = dict(zip(unique_trains, colors))
    
    for train_value in unique_trains:
        subset = df[df['train'] == train_value]
        
        plot_color = color if color else color_map[train_value]
        
        # Plot the line first
        ax.plot(subset['test'], subset['mean_accuracy'], 
                linestyle='-', 
                color=plot_color,
                label=label if label else f'{train_value}')
        
        # Check if std_accuracy column exists and is not all None/NaN
        if 'std_accuracy' in df.columns and not subset['std_accuracy'].isna().all():
            ax.errorbar(subset['test'], subset['mean_accuracy'], 
                        yerr=subset['std_accuracy'],
                        fmt='|', 
                        capsize=2,
                        capthick=1,
                        elinewidth=2,
                        color=plot_color)
    
    ax.set_xlabel('Test')
    ax.set_ylabel('Accuracy')
    ax.set_title(title)
    ax.legend()
    #ax.grid(True)
    handles, labels = ax.get_legend_handles_labels()
    ax.legend(handles, labels, title='Train', ncol=2)
    ax.set_ylim(76, 90)
    return ax

#plot_with_error_bars(RNN_nonlevy, title='RNN')

In [None]:
def plot_diagonal_only(df, ax=None, title=None, label=None, color='grey'):
    # Check if necessary columns exist
    required_columns = ['test', 'mean_accuracy', 'train']
    for col in required_columns:
        if col not in df.columns:
            raise ValueError(f"Column '{col}' not found in DataFrame")
    
    if ax is None:
        fig, ax = plt.subplots(figsize=(12, 8))
    
    # Filter for diagonal data (train == test)
    diagonal_df = df[df['train'] == df['test']].sort_values('train')
    
    # Plot diagonal line
    ax.plot(diagonal_df['test'], diagonal_df['mean_accuracy'], 
            linestyle='-', 
            marker='o',
            color=color,
            linewidth=2,
            label=label)
    
    # Add error bars if std_accuracy is available
    if 'std_accuracy' in df.columns and not diagonal_df['std_accuracy'].isna().all():
        ax.errorbar(diagonal_df['test'], diagonal_df['mean_accuracy'], 
                    yerr=diagonal_df['std_accuracy'],
                    fmt='o', 
                    capsize=4,
                    capthick=2,
                    elinewidth=2,
                    color=color)
    
    ax.set_xlabel('Train k == test k')
    ax.set_ylabel('Accuracy')
    ax.set_title(title)
    ax.legend(ncol=2)
    ax.grid(True)
    #ax.set_ylim(80, 90)
    
    # Set x-axis ticks to match the train/test values
    ax.set_xticks(diagonal_df['test'])
    ax.set_xticklabels(diagonal_df['test'])
    
    return ax


color_LF = 'xkcd:muted blue'
color_NLF = 'xkcd:coral pink'
color_NLF2 = '#60AB9E'
color_NLF3 = '#BEBC48'
color_RNN = '#A778B4'

# Usage example:
ax = plot_diagonal_only(LF_nonlevy['LF'], label='LF', color='xkcd:muted blue')
plot_diagonal_only(NLF_nonlevy['NLF_1'], ax=ax, label='NLF', color='xkcd:coral pink')
plot_diagonal_only(NLFw_nonlevy[2], ax=ax, label='NLF 2', color='#60AB9E')
plot_diagonal_only(NLFw_nonlevy[3], ax=ax, label='NLF 3', color='#BEBC48')
plot_diagonal_only(RNN_nonlevy, ax=ax, label='RNN', color='#A778B4')
plt.ylim(75, 90)
#plt.yticks([75, 80, 85, 90])
plt.savefig('Fig2A.svg', dpi=300)

#### Train & Test with shaded regions

In [125]:
def plot_with_shaded_errors_all(df, ax=None, title='Accuracy', label=None, shade_color='blue', alpha=0.2, show_lines=False, show_markers=False, add_colorbar=True):
    # Check if necessary columns exist
    required_columns = ['test', 'mean_accuracy', 'train']
    for col in required_columns:
        if col not in df.columns:
            raise ValueError(f"Column '{col}' not found in DataFrame")
    
    # Ensure mean_accuracy is numeric
    df['mean_accuracy'] = pd.to_numeric(df['mean_accuracy'], errors='coerce')
    
    if ax is None:
        fig, ax = plt.subplots(figsize=(12, 8))
    
    unique_trains = sorted(df['train'].unique())
    
    # Create a color map for the gradient
    cmap = plt.cm.viridis
    norm = plt.Normalize(vmin=min(unique_trains), vmax=max(unique_trains))
    
    # Sort the dataframe by train and test
    df_sorted = df.sort_values(['train', 'test'])
    
    # Create shaded region between min and max accuracies
    min_accuracies = df_sorted.groupby('test')['mean_accuracy'].min()
    max_accuracies = df_sorted.groupby('test')['mean_accuracy'].max()
    
    ax.fill_between(min_accuracies.index, min_accuracies, max_accuracies,
                    alpha=alpha, color='gray' if shade_color is None else shade_color, label=label)
    
    if show_lines or show_markers:
        for train_value in unique_trains:
            subset = df_sorted[df_sorted['train'] == train_value]
            plot_color = cmap(norm(train_value))
            
            if show_lines and show_markers:
                ax.plot(subset['test'], subset['mean_accuracy'], 
                        linestyle='-', marker='o',
                        color=plot_color,
                        label=f'Train {train_value}')
            elif show_lines:
                ax.plot(subset['test'], subset['mean_accuracy'], 
                        linestyle='-', 
                        color=plot_color,
                        label=f'Train {train_value}')
            
            if show_markers:
                ax.scatter(subset['test'], subset['mean_accuracy'], 
                           color=plot_color)
    
    ax.set_ylim((75,90))
    ax.set_xlabel('Test')
    ax.set_ylabel('Mean Accuracy')
    ax.set_title(title)
    ax.grid(True)
    
    if label:
        ax.legend()
    
    # Add colorbar if requested
    if add_colorbar:
        sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
        sm.set_array([])
        cbar = plt.colorbar(sm, ax=ax, label='Train Value')
        cbar.set_ticks(unique_trains)
    
    return ax


### Dev Start

In [42]:
# COPY
def plot_with_shaded_errors_all(df, ax=None, title='Accuracy', label=None, shade_color='blue', alpha=0.2, show_lines=False, show_markers=False, add_colorbar=True):
    # Check if necessary columns exist
    required_columns = ['test', 'mean_accuracy', 'train']
    for col in required_columns:
        if col not in df.columns:
            raise ValueError(f"Column '{col}' not found in DataFrame")
    
    # Ensure mean_accuracy is numeric
    df['mean_accuracy'] = pd.to_numeric(df['mean_accuracy'], errors='coerce')
    
    if ax is None:
        fig, ax = plt.subplots(figsize=(12, 8))
    
    unique_trains = sorted(df['train'].unique())
    
    # Create a color map for the gradient
    cmap = plt.cm.viridis
    norm = plt.Normalize(vmin=min(unique_trains), vmax=max(unique_trains))
    
    # Sort the dataframe by train and test
    df_sorted = df.sort_values(['train', 'test'])
    
    # Create shaded region between min and max accuracies
    min_accuracies = df_sorted.groupby('test')['mean_accuracy'].min()
    max_accuracies = df_sorted.groupby('test')['mean_accuracy'].max()
    ax.fill_between(min_accuracies.index, min_accuracies, max_accuracies,
                    alpha=alpha, color='gray' if shade_color is None else shade_color, label=label)
    
    if show_lines or show_markers:
        for train_value in unique_trains:
            subset = df_sorted[df_sorted['train'] == train_value]
            plot_color = cmap(norm(train_value))
            
            if show_lines and show_markers:
                ax.plot(subset['test'], subset['mean_accuracy'], 
                        linestyle='-', marker='o',
                        color=plot_color,
                        label=f'Train {train_value}')
            elif show_lines:
                ax.plot(subset['test'], subset['mean_accuracy'], 
                        linestyle='-', 
                        color=plot_color,
                        label=f'Train {train_value}')
            
            if show_markers:
                ax.scatter(subset['test'], subset['mean_accuracy'], 
                           color=plot_color)
    

    # Set custom x-ticks and labels
    custom_xticks = [-1, 0, 1, 2, 3, 4, 5, 6, 7, 8]
    custom_xlabels = ['Lévy', 'Lévy flat', '1', '2', '3', '4', '5', '6', '7', '8']
    ax.set_xticks(custom_xticks)
    ax.set_xticklabels(custom_xlabels)

    # Rotate labels if needed for better readability
    plt.setp(ax.get_xticklabels(), rotation=45, ha='right')
    
    ax.set_ylim((75,90))
    ax.set_xlabel('Test')
    ax.set_ylabel('Mean Accuracy')
    ax.set_title(title)
    ax.grid(True)
    
    if label:
        ax.legend()
    
    # Add colorbar if requested
    if add_colorbar:
        sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
        sm.set_array([])
        cbar = plt.colorbar(sm, ax=ax, label='Train Value')
        cbar.set_ticks(unique_trains)
    
    return ax


In [43]:
# Main plotting code
train_values = [1, 2, 3, 4, 5, 6, 7, 8]
df = NLF_nonlevy['NLF_1'][NLF_nonlevy['NLF_1']['train'].isin(train_values)]
df2 = NLFw_nonlevy[2][NLFw_nonlevy[2]['train'].isin(train_values)]
df3 = NLFw_nonlevy[3][NLFw_nonlevy[3]['train'].isin(train_values)]
dfrnn = RNN_nonlevy[RNN_nonlevy['train'].isin(train_values)]

In [47]:
datalevy = np.load('./NLFw/NLFw_classifier_testmetrics_testedonlevy_trainedonk.pkl', allow_pickle=True)
datalevyeq = np.load('./NLFw/NLFw_classifier_testmetrics_testedonlevy_eq_trainedonk.pkl', allow_pickle=True)

def split_NLFw_onlevy(data_dict):
    
    # Initialize empty dictionaries for the split data
    data_2 = {}
    data_3 = {}

    # Split the data based on the second item of the key
    for key, value in data_dict.items():
        if key[1] == 2:
            data_2[key] = value
        elif key[1] == 3:
            data_3[key] = value

    # Create dataframes from the split data
    NLFw_onlevy_2 = pd.DataFrame.from_dict(data_2, orient='index')
    NLFw_onlevy_3 = pd.DataFrame.from_dict(data_3, orient='index')

    return NLFw_onlevy_2, NLFw_onlevy_3

NLFw_onlevy2, NLFw_onlevy3 = split_NLFw_onlevy(datalevy)
NLFw_onlevyeq2, NLFw_onlevyeq3 = split_NLFw_onlevy(datalevyeq)

NLFw_onlevy2#['accuracy'].values
accuracies = NLFw_onlevy2['accuracy'].values * 100

new_rows = pd.DataFrame({
    'train': [1,2,3,4,5,6,7,8],
    'test': [-1] * len(accuracies),
    'mean_accuracy': accuracies,
    'std_accuracy': [None] * len(accuracies)
})
combined_df2 = pd.concat([new_rows, df2], ignore_index=True)

accuracies = NLFw_onlevyeq2['accuracy'].values * 100
new_rows = pd.DataFrame({
    'train': [1,2,3,4,5,6,7,8],
    'test': [0] * len(accuracies),
    'mean_accuracy': accuracies,
    'std_accuracy': [None] * len(accuracies)
})
combined_df2 = pd.concat([new_rows, combined_df2], ignore_index=True)



NLFw_onlevy3#['accuracy'].values
accuracies = NLFw_onlevy3['accuracy'].values * 100
new_rows = pd.DataFrame({
    'train': [1,2,3,4,5,6,7,8],
    'test': [-1] * len(accuracies),
    'mean_accuracy': accuracies,
    'std_accuracy': [None] * len(accuracies)
})
combined_df3 = pd.concat([new_rows, df3], ignore_index=True)

NLFw_onlevy3#['accuracy'].values
accuracies = NLFw_onlevyeq3['accuracy'].values * 100
new_rows = pd.DataFrame({
    'train': [1,2,3,4,5,6,7,8],
    'test': [0] * len(accuracies),
    'mean_accuracy': accuracies,
    'std_accuracy': [None] * len(accuracies)
})
combined_df3 = pd.concat([new_rows, combined_df3], ignore_index=True)

In [None]:
# Create a figure with subplots
fig, axs = plt.subplots(2, 2, figsize=(20, 15)) #3, 2
axs = axs.flatten()  # Flatten the 2D array of axes to make it easier to iterate

# List of datasets and their corresponding properties
datasets = [
    #(LF_nonlevy['LF'], 'LF', 'green', LF_levy['LF']),
    #(df, 'NLF', 'grey', NLF_levy['NLF_1']),
    #(df2, 'NLF2', 'blue', NLFw_levy[2]),
    #(df3, 'NLF3', 'red', NLFw_levy[3]),
    #(dfrnn, 'RNN', 'purple', RNN_levy),
    (combined_df2, 'NLF2', 'blue', NLFw_levy[2]),
    (combined_df3, 'NLF3', 'red', NLFw_levy[3])
]

# Plot each dataset in its own subplot
for i, (data, label, color, levy) in enumerate(datasets):
    ax = plot_with_shaded_errors_all(
        data, 
        ax=axs[i], 
        title=None,
        label=label, 
        shade_color=color,
        show_lines=0,  # Set to False if you don't want to show lines
        show_markers=0,  # Set to False if you don't want to show markers
        add_colorbar=0  # Set to False if you don't want to add the colorbar
    )
    
    #plot_with_error_bars(levy, ax=ax, title=None)
    # Set y-axis limits
    ax.set_ylim(75, 100)
axs = axs.flatten()  # This flattens the 2D array to 1D if it's 2D
fig.delaxes(axs[-1])  # This removes the last subplot

# Adjust layout
plt.tight_layout()

# Add a main title for the entire figure
fig.suptitle('Comparison of Different Models', fontsize=16, y=1.02)

# Show the plot
plt.show()

In [None]:

x = np.load('../NLFw_classifier_testmetrics_testedonlevy_trainedlevy.pkl', allow_pickle=True) # -1
y = np.load('../NLFw_classifier_testmetrics_testedonlevy_eq_trainedlevy.pkl', allow_pickle=True) # 0

x = x[('levy', 2, 'Levy')]['accuracy'] * 100
y = y[('levy', 2, 'Levy')]['accuracy'] *  100

new_rows = pd.DataFrame({
    'train': ['levy', 'levy'],
    'test': [-1, 0],
    'mean_accuracy': [x, y],  # Replace 'x' and 'y' with actual numerical values
    'std_accuracy': [None, None]
})

# Concatenate the new rows with the existing DataFrame
NLFw_levy[2] = pd.concat([new_rows,  NLFw_levy[2]]).reset_index(drop=True)

In [None]:
x = np.load('../NLFw_classifier_testmetrics_testedonlevy_trainedlevy.pkl', allow_pickle=True) # -1
y = np.load('../NLFw_classifier_testmetrics_testedonlevy_eq_trainedlevy.pkl', allow_pickle=True) # 0

x = x[('levy', 3, 'Levy')]['accuracy'] * 100
y = y[('levy', 3, 'Levy')]['accuracy'] *  100

new_rows = pd.DataFrame({
    'train': ['levy', 'levy'],
    'test': [-1, 0],
    'mean_accuracy': [x, y],  # Replace 'x' and 'y' with actual numerical values
    'std_accuracy': [None, None]
})

# Concatenate the new rows with the existing DataFrame
NLFw_levy[3] = pd.concat([new_rows,  NLFw_levy[3]]).reset_index(drop=True)
NLFw_levy[3]

### Dev End

#### (Train - Test) with error bars

In [53]:
def plot_with_error_bars_diff(df, ax=None, title='Accuracy', label=None, color=None):
    # Check if necessary columns exist
    required_columns = ['test', 'mean_accuracy', 'train']
    for col in required_columns:
        if col not in df.columns:
            raise ValueError(f"Column '{col}' not found in DataFrame")
    
    # Ensure mean_accuracy and std_accuracy are numeric
    df['mean_accuracy'] = pd.to_numeric(df['mean_accuracy'], errors='coerce')
    if 'std_accuracy' in df.columns:
        df['std_accuracy'] = pd.to_numeric(df['std_accuracy'], errors='coerce')
    
    if ax is None:
        fig, ax = plt.subplots(figsize=(12, 8))
    
    # Calculate the difference between train and test
    df['train_test_diff'] = df['train'] - df['test']
    
    unique_trains = df['train'].unique()
    colors = plt.cm.plasma(np.linspace(0, 1, len(unique_trains)))
    color_map = dict(zip(unique_trains, colors))
    
    for train_value in unique_trains:
        subset = df[df['train'] == train_value]
        plot_color = color if color else color_map[train_value]
        
        # Sort the subset by train_test_diff to ensure correct line plotting
        subset = subset.sort_values('train_test_diff')
        
        # Plot the line connecting the points
        ax.plot(subset['train_test_diff'], subset['mean_accuracy'], 
                linestyle='-', 
                color=plot_color,
                label=label if label else f'Train {train_value}')
        
        # Plot the scatter points with error bars
        ax.errorbar(subset['train_test_diff'], subset['mean_accuracy'], 
                    yerr=subset['std_accuracy'] if 'std_accuracy' in df.columns else None,
                    fmt='o', 
                    capsize=5,
                    capthick=1,
                    elinewidth=1,
                    color=plot_color)
    
    ax.set_ylim((76,90))
    #ax.set_xlim(right=0)
    ax.set_xlabel('Train-Test Difference')
    ax.set_ylabel('Accuracy')
    ax.set_title(title)
    ax.legend(title='Train')
    ax.grid(True)
    return ax

#### (Train - Test) with shaded regions: Individual

In [54]:
def plot_with_shaded_errors_diff(df, ax=None, title='Accuracy', label=None, color=None, alpha=0.2, allow_legend=True):
    # Check if necessary columns exist
    required_columns = ['test', 'mean_accuracy', 'train']
    for col in required_columns:
        if col not in df.columns:
            raise ValueError(f"Column '{col}' not found in DataFrame")
    
    # Ensure mean_accuracy and std_accuracy are numeric
    df['mean_accuracy'] = pd.to_numeric(df['mean_accuracy'], errors='coerce');
    if 'std_accuracy' in df.columns:
        df['std_accuracy'] = pd.to_numeric(df['std_accuracy'], errors='coerce')
    
    if ax is None:
        fig, ax = plt.subplots(figsize=(12, 8))
    
    # Calculate the difference between train and test
    df['train_test_diff'] = df['train'] - df['test']
    
    unique_trains = df['train'].unique()
    colors = plt.cm.plasma(np.linspace(0, 1, len(unique_trains)))
    color_map = dict(zip(unique_trains, colors))
    
    for train_value in unique_trains:
        subset = df[df['train'] == train_value]
        plot_color = color if color else color_map[train_value]
        
        # Sort the subset by train_test_diff to ensure correct line plotting
        subset = subset.sort_values('train_test_diff')
        
        # Plot the line
        ax.plot(subset['train_test_diff'], subset['mean_accuracy'], 
                linestyle='-', 
                marker='o',
                color=plot_color,
                label=label if label else f'{train_value}')
        
        # Add shaded error region if std_accuracy is available
        if 'std_accuracy' in df.columns and not subset['std_accuracy'].isna().all():
            ax.fill_between(subset['train_test_diff'], 
                            subset['mean_accuracy'] - subset['std_accuracy'],
                            subset['mean_accuracy'] + subset['std_accuracy'],
                            alpha=alpha,
                            color=plot_color)
    
    ax.set_ylim((75,90))
    ax.set_xlabel('Train-Test Difference')
    ax.set_ylabel('Mean Accuracy')
    ax.set_title(title)
    if allow_legend:
        ax.legend(title='Train', ncol=2)
    ax.grid(True)
    return ax

#### (Train - Test) with shaded regions: Grouped

In [55]:
def plot_with_shaded_errors_diff_all(df, ax=None, title='Accuracy', label=None, shade_color='blue', alpha=0.2, show_lines=False, show_markers=False, add_colorbar=True):
    # Check if necessary columns exist
    required_columns = ['test', 'mean_accuracy', 'train']
    for col in required_columns:
        if col not in df.columns:
            raise ValueError(f"Column '{col}' not found in DataFrame")
    
    # Ensure mean_accuracy is numeric
    df['mean_accuracy'] = pd.to_numeric(df['mean_accuracy'], errors='coerce')
    
    if ax is None:
        fig, ax = plt.subplots(figsize=(12, 8))
    
    # Calculate the difference between train and test
    df['train_test_diff'] = df['train'] - df['test']
    
    unique_trains = sorted(df['train'].unique())
    
    # Create a color map for the gradient
    cmap = plt.cm.viridis
    norm = plt.Normalize(vmin=min(unique_trains), vmax=max(unique_trains))
    
    # Sort the entire dataframe by train_test_diff
    df_sorted = df.sort_values('train_test_diff')
    
    # Plot lines and markers if requested
    if show_lines or show_markers:
        for train_value in unique_trains:
            subset = df_sorted[df_sorted['train'] == train_value]
            line_color = cmap(norm(train_value))
            
            if show_lines and show_markers:
                ax.plot(subset['train_test_diff'], subset['mean_accuracy'], 
                        linestyle='-', marker='o', color=line_color, 
                        label=f'Train {train_value}')
            elif show_lines:
                ax.plot(subset['train_test_diff'], subset['mean_accuracy'], 
                        linestyle='-', color=line_color, 
                        label=f'Train {train_value}')
            elif show_markers:
                ax.scatter(subset['train_test_diff'], subset['mean_accuracy'], 
                           color=line_color, label=f'Train {train_value}')
    
    # Create shaded region between min and max accuracies
    min_accuracies = df_sorted.groupby('train_test_diff')['mean_accuracy'].min()
    max_accuracies = df_sorted.groupby('train_test_diff')['mean_accuracy'].max()
    
    ax.fill_between(min_accuracies.index, min_accuracies, max_accuracies,
                    alpha=alpha, color=shade_color, label=label)
    
    ax.set_ylim((75,90))
    ax.set_xlabel('Train-Test Difference')
    ax.set_ylabel('Mean Accuracy')
    ax.set_title(title)
    ax.grid(True)
    
    if label:
        ax.legend()
    
    # Add colorbar if requested
    if add_colorbar:
        sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
        sm.set_array([])
        cbar = plt.colorbar(sm, ax=ax, label='Train Value')
        cbar.set_ticks(unique_trains)
    
    return ax

In [None]:
import matplotlib.pyplot as plt
import math

def plot_model_comparison(cases, train_values, figsize=(10, 6)):
    n = len(cases)
    rows = (n + 1) // 2  # This will give 1 row for 1-2 plots, 2 rows for 3-4 plots, etc.
    cols = min(n, 2)     # This ensures we never have more than 2 columns

    fig, axs = plt.subplots(rows, cols, figsize=figsize)
    
    # Ensure axs is always a 2D array
    if n == 1:
        axs = np.array([[axs]])
    elif n == 2:
        axs = axs.reshape(1, -1)

    for i, (title, base) in enumerate(cases):
        row = i // 2
        col = i % 2
        df = base[base['train'].isin(train_values)]
        ax = axs[row, col]
        plot_with_shaded_errors_diff(df, ax=ax, allow_legend=(i == 0))
        ax.set_title(title)
        ax.set_ylim(bottom=76)
    
    # Remove any unused subplots
    for i in range(n, rows * cols):
        row = i // cols
        col = i % cols
        fig.delaxes(axs[row][col] if rows > 1 else axs[col])
    
    # Adjust the layout and add a main title
    plt.tight_layout()
    #fig.suptitle('Performance Comparison Across Different Models', fontsize=16)
    plt.subplots_adjust(top=0.93)
    
    # Show the plot
    plt.show()

# Usage
cases = [
    #('LF', LF_nonlevy['LF']),
    #('NLF', NLF_nonlevy['NLF_1']),
    #('NLF 2', NLFw_nonlevy[2]),
    #('NLF 3', NLFw_nonlevy[3]),
    ('RNN', RNN_nonlevy)
]



# To plot only specific cases:
# cases_to_plot = [cases[0], cases[2], cases[4]]
train_values=[2,3,4,5,6,7,8]
plot_model_comparison(cases, train_values=train_values)

In [None]:
# Main plotting code
train_values = [1, 2, 3, 4, 5, 6, 7, 8]
df2 = NLFw_nonlevy[2][NLFw_nonlevy[2]['train'].isin(train_values)]
df3 = NLFw_nonlevy[3][NLFw_nonlevy[3]['train'].isin(train_values)]
dfrnn = RNN_nonlevy[RNN_nonlevy['train'].isin(train_values)]

# Create a figure with subplots
fig, axs = plt.subplots(2, 2, figsize=(20, 16))
axs = axs.flatten()  # Flatten the 2D array of axes to make it easier to iterate

# List of datasets and their corresponding properties
datasets = [
    (LF_nonlevy['LF'], 'LF', 'green'),
    (df2, 'NLF2', 'blue'),
    (df3, 'NLF3', 'red'),
    (dfrnn, 'RNN', 'purple')
]

# Plot each dataset in its own subplot
for i, (data, label, color) in enumerate(datasets):
    ax = plot_with_shaded_errors_diff_all(
        data, 
        ax=axs[i], 
        title=None,
        label=label, 
        shade_color=color,
        show_lines=False,  # Set to False if you don't want to show lines
        show_markers=False,  # Set to False if you don't want to show markers
        add_colorbar=False  # Set to False if you don't want to add the colorbar
    )
    
    # Set y-axis limits
    ax.set_ylim(75, 90)

# Adjust layout
plt.tight_layout()

# Add a main title for the entire figure
fig.suptitle('Comparison of Different Models', fontsize=16, y=1.02)

# Show the plot
plt.show()

In [None]:
# Main plotting code
train_values = [1, 2, 3, 4, 5, 6, 7, 8]
df = NLF_nonlevy['NLF_1'][NLF_nonlevy['NLF_1']['train'].isin(train_values)]
df2 = NLFw_nonlevy[2][NLFw_nonlevy[2]['train'].isin(train_values)]
df3 = NLFw_nonlevy[3][NLFw_nonlevy[3]['train'].isin(train_values)]
dfrnn = RNN_nonlevy[RNN_nonlevy['train'].isin(train_values)]

# Create a figure with subplots
fig, axs = plt.subplots(3, 2, figsize=(20, 15))
axs = axs.flatten()  # Flatten the 2D array of axes to make it easier to iterate

# List of datasets and their corresponding properties
datasets = [
    (LF_nonlevy['LF'], 'LF', 'green', LF_levy['LF']),
    (df, 'NLF', 'grey', NLF_levy['NLF_1']),
    (df2, 'NLF2', 'blue', NLFw_levy[2]),
    (df3, 'NLF3', 'red', NLFw_levy[3]),
    (dfrnn, 'RNN', 'purple', RNN_levy)
]

# Plot each dataset in its own subplot
for i, (data, label, color, levy) in enumerate(datasets):
    ax = plot_with_shaded_errors_all(
        data, 
        ax=axs[i], 
        title=None,
        label=label, 
        shade_color=color,
        show_lines=0,  # Set to False if you don't want to show lines
        show_markers=0,  # Set to False if you don't want to show markers
        add_colorbar=0  # Set to False if you don't want to add the colorbar
    )
    plot_with_error_bars(levy, ax=ax, title=None)
    # Set y-axis limits
    ax.set_ylim(75, 90)
axs = axs.flatten()  # This flattens the 2D array to 1D if it's 2D
fig.delaxes(axs[-1])  # This removes the last subplot
# Adjust layout
plt.tight_layout()

# Add a main title for the entire figure
fig.suptitle('Comparison of Different Models', fontsize=16, y=1.02)

# Show the plot
plt.show()

In [None]:
#ax = plot_with_error_bars(RNN_levy, label='Levy', color='grey')
#plot_with_error_bars(RNN_levy_eqldistr, ax=ax, label='Levy equal dist k', color='salmon')
# Plotting the second dataframe on the same figure
plot_with_error_bars(RNN_nonlevy,  title='$RNN$')
plt.ylim(75,90)
#plt.grid()
plt.savefig('Fig2D1.svg', dpi=300)

In [None]:
aax = plot_with_error_bars(RNN_levy, label='RNN Levy', color='salmon')
#plot_with_error_bars(RNN_nonlevy[RNN_nonlevy['train']==3], label='RNN train 3', color='red')
#
plot_with_error_bars(NLFw_levy[3], ax=aax, label='NLF3 Levy', color='grey')
#plot_with_error_bars(NLFw_levy[2], ax=aax, label='NLF2 Levy', color='steelblue')
#plot_with_error_bars(NLFw_levy_eqldistr[2], ax=aax, label='NLF2 Levy eq', color='b')
#plot_with_error_bars(NLFw_levy_eqldistr[3], ax=aax, label='NLF3 Levy eq', color='y')


#plot_with_error_bars(NLFw_nonlevy[3][NLFw_nonlevy[3]['train']==3], ax=aax, label='NLF3 train 3', color='black')
#plot_with_error_bars(NLFw_nonlevy[2][NLFw_nonlevy[2]['train']==3], ax=aax, label='NLF2 train 3', color='green')
##plt.ylim(82,85)


In [None]:
ws = 3
axx = plot_with_error_bars(NLFw_nonlevy[ws],title='$NLF_3$')
plt.ylim(75,90)
#plt.grid()
plt.savefig('Fig2C1.svg', dpi=300)
#plot_with_error_bars(NLFw_levy_eqldistr[ws], ax=axx, label='Levy equal dist k', color='salmon')
#plot_with_error_bars(NLFw_levy[ws], ax=axx, title=f'NLF {ws}', color='grey')

In [None]:
ws = 2
axx = plot_with_error_bars(NLFw_nonlevy[ws], title='$NLF_2$')
plt.ylim(75,90)
#plt.grid()
plt.savefig('Fig2B1.svg', dpi=300)
#plot_with_error_bars(NLFw_levy_eqldistr[ws], ax=axx, label='Levy equal dist k', color='salmon')
#plot_with_error_bars(NLFw_levy[ws], ax=axx, title=f'NLF {ws}', color='grey')

In [None]:
ws = 'NLF_1'
axx = plot_with_error_bars(NLF_nonlevy[ws])
plot_with_error_bars(NLF_levy_eqldistr[ws], ax=axx, label='Levy equal dist k', color='salmon')
plot_with_error_bars(NLF_levy[ws], ax=axx, title=f'{ws}', color='grey')

In [None]:
ws = 'LF'
axx = plot_with_error_bars(LF_nonlevy[ws])
plot_with_error_bars(LF_levy_eqldistr[ws], ax=axx, label='Levy equal dist k', color='salmon')
plot_with_error_bars(LF_levy[ws], ax=axx, title=f'{ws}', color='grey')

#### RNN std

In [None]:
import pandas as pd

# Set your desired threshold
threshold = 0.6  # Adjust this value as needed

# Create a new DataFrame with the filtered rows and the additional 'difference' column
result = RNN_nonlevy[RNN_nonlevy['std_accuracy'] > threshold].copy()
result['difference'] = result['train'] - result['test']

# Sort the results by 'std_accuracy' in descending order
result = result.sort_values('std_accuracy', ascending=False)

# Select and reorder the columns as desired
columns_to_show = ['train', 'test', 'difference', 'mean_accuracy', 'std_accuracy']
result = pd.DataFrame(result[columns_to_show])


plt.scatter(result['std_accuracy'], result['difference'])

In [None]:
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.colors import LinearSegmentedColormap


def generate_matrices(df):
    M = np.zeros((8,8))
    E = np.zeros((8,8))
    for i in range(8):
        for j in range(8):
            M[i,j] = df[(df['train']==i+1) & (df['test']==j+1)]['mean_accuracy'].values[0]
            E[i,j] = df[(df['train']==i+1) & (df['test']==j+1)]['std_accuracy'].values[0]
    return M, E

def norm(M, E=None, title='Alg', do_return=False):
    # Create a plasma-like colormap
    colors = plt.cm.plasma(np.linspace(0, 1, 8))
    plt.figure(figsize=(10,6))
    for train in range(1,9):
        test = np.arange(1,9)
        x = train - test
        y = M[train-1, :] - M.max(axis=0)
        
        
        # Plot the line with plasma colors
        
        plt.plot(x, y, 'o-', label=f'{train}', color=colors[train-1])
        
        # Add shaded region for error with plasma colors
        #if E:
        # Release for RNN
        error = E[train-1, :]
        plt.fill_between(x, y-error, y+error, alpha=0.2, color=colors[train-1])
        
    if title == '$NLF_2$':
        plt.legend(title = 'Train', ncol=2)
    plt.xlabel('Train - Test')
    plt.ylabel('$\Delta$ Accuracy')
    plt.vlines(0, -8, 1, color='grey', linestyle='--', linewidth=3)
    plt.title(f'{title}')
    #plt.grid()
    plt.ylim(-8,1)
    if do_return:
        return M, E

In [None]:
M, _ = generate_matrices(NLFw_nonlevy[3])
norm(M, title='$NLF_3$')
plt.savefig('Fig2C2.svg', dpi=300)
#plt.show()

In [None]:
# Call the function
M, _ = generate_matrices(NLFw_nonlevy[2])
norm(M, title='$NLF_2$')
plt.savefig('Fig2B2.svg', dpi=300)
plt.show()

"""
M, _ = generate_matrices(NLFw_nonlevy[3])
norm(M, 'NLF 3')
plt.show()

M, _ = generate_matrices(RNN_nonlevy)
norm(M, 'RNN')
plt.show()

norm(NLF_nonlevy['NLF_1'], 'NLF 2')
plt.show()

norm(LF_nonlevy['LF'], 'NLF 2')
plt.show()
"""

In [None]:
# Call the function
M, E = generate_matrices(RNN_nonlevy)
norm(M, E, title='$RNN$')
plt.savefig('Fig2D2.svg', dpi=300)
plt.show()

In [None]:

M, _ = generate_matrices(RNN_nonlevy)
N = M - M.max(axis=0)


### Overall winner

In [None]:
generate_matrices(RNN_nonlevy)

In [75]:
def generate_matrices(df):
    M = np.zeros((8,8))
    E = np.zeros((8,8))
    for i in range(8):
        for j in range(8):
            M[i,j] = df[(df['train']==i+1) & (df['test']==j+1)]['mean_accuracy'].values[0]
            E[i,j] = df[(df['train']==i+1) & (df['test']==j+1)]['std_accuracy'].values[0]
    return M, E

In [76]:
RNN_levytrained = np.load('./RNN_levytrained_alltest.npy', allow_pickle=True).item()
RNN_levyeqtrained = np.load('./RNN_levyeqtrained_alltest.npy', allow_pickle=True).item()
RNN_ktrained_levytested = np.load('./RNN_testonlevy_trainonk.npy', allow_pickle=True).item()
RNN_ktrained_levyeqtested = np.load('./RNN_testonlevyeq_trainonk.npy', allow_pickle=True).item()

In [77]:
def sortmean(data):
    dict_arrays = {k: np.array(v) if not isinstance(v, np.ndarray) else v for k, v in data.items()}
    sorted_means = {k: np.mean(dict_arrays[k]) for k in sorted(dict_arrays.keys())}
    return sorted_means

In [78]:
RNN_levytrained = sortmean(RNN_levytrained)
RNN_levyeqtrained = sortmean(RNN_levyeqtrained)
RNN_ktrained_levytested = sortmean(RNN_ktrained_levytested)
RNN_ktrained_levyeqtested = sortmean(RNN_ktrained_levyeqtested)

In [79]:
X = np.zeros([10,10])
X[2:,2:], _ = generate_matrices(RNN_nonlevy)
X[0] = list(RNN_levytrained.values())
X[1] = list(RNN_levyeqtrained.values())
X[2:10,0] = list(RNN_ktrained_levytested.values())
X[2:10,1] = list(RNN_ktrained_levyeqtested.values())
RNN_all = X

In [80]:
a = np.load('./NLF2_levytrained_alltest.npy', allow_pickle=True)
b = np.load('./NLF2_levyeqtrained_alltest.npy', allow_pickle=True)
c = np.load('./NLF2_ktrained_alltest.npy', allow_pickle=True)[:8][:, 2] # Levy
d = np.load('./NLF2_ktrained_alltest.npy', allow_pickle=True)[8:16][:, 2] # Levy flat

In [81]:
Y = np.zeros([10,10])
Y[2:,2:], _ = generate_matrices(NLFw_nonlevy[2])
Y[0] = a
Y[1] = b
Y[2:10,0] = c
Y[2:10,1] = d
NLF2 = Y

In [82]:
a = np.load('./NLF3_levytrained_alltest.npy', allow_pickle=True)
b = np.load('./NLF3_levyeqtrained_alltest.npy', allow_pickle=True)
c = np.load('./NLF3_ktrained_alltest.npy', allow_pickle=True)[:8][:, 2] # Levy
d = np.load('./NLF3_ktrained_alltest.npy', allow_pickle=True)[8:16][:, 2] # Levy flat

In [83]:
Z = np.zeros([10,10])
Z[2:,2:], _ = generate_matrices(NLFw_nonlevy[3])
Z[0] = a
Z[1] = b
Z[2:10,0] = c
Z[2:10,1] = d
NLF3 = Z

In [84]:
M0, _ = generate_matrices(LF_nonlevy['LF'])
N0 = M0 #- M2.max(axis=0)

M1, _ = generate_matrices(NLF_nonlevy['NLF_1'])
N1 = M1 #- M2.max(axis=0)

In [85]:
M2, _ = generate_matrices(NLFw_nonlevy[2])
#W2 = M2 - M2.min(axis=0)

M3, _ = generate_matrices(NLFw_nonlevy[3])
#N3 = M3# - M3.max(axis=0)

Mr, _ = generate_matrices(RNN_nonlevy)
#Nr = Mr# - Mr.max(axis=0)

In [393]:
min_acc = min(M1.min(), M2.min(), M3.min())
max_acc = max(M1.max(), M2.max(), M3.max())

In [395]:
W2 = M2 - min_acc
W3 = M3 - min_acc
Wr = Mr - min_acc

In [None]:
accuracy_matrices = [N1, N3, Nr]
plot_algorithm_performance(accuracy_matrices)

In [41]:
M, _ = generate_matrices(NLFw_nonlevy[3])
weight = M - M.min()
#sat = weight /  # saturation

In [87]:
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.patches import Circle, Wedge
import colorsys

def get_global_min_max(accuracy_matrices):
    """Get global minimum and maximum accuracies across all matrices."""
    all_values = [acc for matrix in accuracy_matrices for row in matrix for acc in row]
    return min(all_values), max(all_values)

def calculate_radius_and_saturations(accuracies, global_min, global_max):
    """
    Calculate radius and saturations using global min/max values.
    
    Radius formula: (Acc(alg, tr, te) - global_min) / (global_max - global_min)
    """
    # Handle edge case where global min equals global max
    if global_max == global_min:
        return [1] * len(accuracies), [1/3] * len(accuracies)
    
    # Calculate radii using global normalization
    radii = [(acc - global_min) / (global_max - global_min) for acc in accuracies]
    
    # Calculate saturations (local normalization for visual distinction)
    min_acc = min(accuracies)
    weights = [acc - min_acc for acc in accuracies]
    weight_sum = sum(weights)
    
    if weight_sum == 0:
        saturations = [1/3] * len(accuracies)  # Equal saturations if all values are equal
    else:
        saturations = [weight/weight_sum for weight in weights]
    
    # Ensure minimum radius for visibility
    min_radius = 0.2  # Minimum radius for visibility
    radii = [max(r, min_radius) for r in radii]
    
    return radii, saturations

def adjust_color_saturation(base_color, saturation):
    """Adjust the saturation of a hex color."""
    rgb = tuple(int(base_color.lstrip('#')[i:i+2], 16)/255 for i in (0, 2, 4))
    hsv = colorsys.rgb_to_hsv(*rgb)
    min_saturation = 0.1
    max_saturation = 1
    adjusted_saturation = min_saturation + (max_saturation - min_saturation) * saturation
    new_hsv = (hsv[0], adjusted_saturation, hsv[2])
    rgb = colorsys.hsv_to_rgb(*new_hsv)
    return '#{:02x}{:02x}{:02x}'.format(int(rgb[0]*255), int(rgb[1]*255), int(rgb[2]*255))



In [None]:
def plot_algorithm_performance(accuracy_matrices, show_values=False, value_format='.2f', 
                             value_size=8, value_color='grey'):
    """
    Plot algorithm performance comparison with globally normalized radii.
    """
    n_scenarios = len(accuracy_matrices[0])
    fig, axs = plt.subplots(n_scenarios, n_scenarios, figsize=(1.*n_scenarios, 1.*n_scenarios))
    
    #base_colors = ['#FFB6C1', '#ADD8E6', '#DDA0DD']  # Pastel Pink, Pastel Blue, Pastel Violet
    base_colors = [color_NLF2, color_NLF3, color_RNN] #['#DAA520', '#b5c9b5', '#9b7aa1'] 
    algorithm_names = ['NLF 2', 'NLF 3', 'RNN']
    
    # Get global min and max for normalization
    global_min, global_max = get_global_min_max(accuracy_matrices)
    
    for i in range(n_scenarios):
        for j in range(n_scenarios):
            ax = axs[i, j]
            accuracies = [matrix[i][j] for matrix in accuracy_matrices]
            radii, saturations = calculate_radius_and_saturations(accuracies, global_min, global_max)
            
            # Scale up the radii
            radii = [r * 1.5 for r in radii]  # Increase radius by 50%
            
            start_angle = 0
            for k, (accuracy, radius, saturation) in enumerate(zip(accuracies, radii, saturations)):
                end_angle = start_angle + 360 * (1/3)
                color = adjust_color_saturation(base_colors[k], saturation)
                
                wedge = Wedge((0, 0), radius, start_angle, end_angle, fc=color, ec='none')
                ax.add_artist(wedge)
                
                if show_values:
                    angle_rad = np.radians(start_angle + (end_angle - start_angle)/2)
                    text_radius = radius * 0.5
                    x = text_radius * np.cos(angle_rad)
                    y = text_radius * np.sin(angle_rad)
                    
                    value_text = f"{accuracy:{value_format}}"
                    ax.text(x, y, value_text, ha='center', va='center', 
                           fontsize=value_size, color=value_color,
                           rotation=np.degrees(angle_rad) % 360 - 90)
                
                start_angle = end_angle
            
            # Expand plot limits to accommodate larger radii
            ax.set_xlim(-1.65, 1.65)  # Increased from ±1.1
            ax.set_ylim(-1.65, 1.65)
            ax.axis('off')
    
    # Define scenario names
    test_names = ['Lévy', 'Uniform'] + [f'{i}' for i in range(1, 9)]
    train_names = ['Lévy', 'Uniform'] + [f'{i}' for i in range(1, 9)]
    
    # Set column titles (test scenarios)
    for j in range(n_scenarios):
        axs[0, j].set_title(test_names[j], fontsize=15, color='black')
    
    # Set row labels (train scenarios)
    for i in range(n_scenarios):
        axs[i, 0].text(-2.25, 0, train_names[i], rotation=90, va='center', ha='right', fontsize=15, color='black')
    
    # Add x and y axis labels - moved 'Test' label above the plot
    fig.text(0.5, 1, 'Test', ha='center', va='center', fontsize=20)
    fig.text(-0.0, 0.5, 'Train', va='center', ha='center', rotation=90, fontsize=20)
    
    legend_elements = [plt.Rectangle((0, 0), 1, 1, fc=color, ec='none') for color in base_colors]
    fig.legend(legend_elements, algorithm_names, loc='upper right', bbox_to_anchor=(0.97, 1.01), 
              fontsize=10, frameon=False, ncol=3)
    
    plt.tight_layout()
    return fig, axs

# With white values
accuracy_matrices = [Y, Z, X]
reduced = [Y[2:,2:], Z[2:,2:], X[2:,2:]]
original = [M2, M3, Mr]
plot_algorithm_performance(accuracy_matrices, show_values=0, value_color='white')
#plt.title('Relative saturations: normalised weights')
plt.savefig('Fig4.svg', dpi=300, bbox_inches='tight', pad_inches=0.2)