In [None]:
%pwd

In [None]:
%cd ../..

In [None]:
%ls

In [None]:
import glob as glob
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

# Utils

In [None]:
# Utility function for renaming
problem_name_mapping = {
    'ackley3': 'Ak3',
    'alpine1': 'Al1',
    'bird': 'Bd',
    'bohachevsky2': 'By2',
    'bohachevsky3': 'By3',
    'bukin_n2': 'Bk2',
    'bukin_n4': 'Bk4',
    'camel_3hump': 'C3h',
    'chung_reynolds': 'CR',
    'cross_leg_table': 'CLT',
    'eggholder': 'Eh',
    'griewank': 'Gw',
    'holder_table': 'HTable',
    'levy': 'Ly',
    'levy_n13': 'Ly13',
    'schaffer_n2': 'Sc2',
    'schaffer_n4': 'Sc4',
    'schwefel': 'Swl',
    'shubert': 'Sbt',
    'xinsheyang_n3': 'XSY3',
}


def build_dl_results(files):
    """
    build deeplifting results from files
    """
    deeplifting_df = pd.read_parquet(deeplifting_files)

    # We need to create the hits column
    deeplifting_df['hits'] = (
        np.abs(deeplifting_df['global_minimum'] - deeplifting_df['f']) <= 1e-4
    )

    # Deeplifting groupby columns
    groupby_columns = [
        'problem_name',
        'algorithm',
        'input_size',
        'hidden_size',
        'hidden_activation',
        'output_activation',
        'agg_function',
        'include_bn',
    ]

    # Final deeplifting results columns
    columns = ['problem_name', 'algorithm', 'success rate']

    # Find the best neural network for the problem
    deeplifting_results_df = (
        deeplifting_df.groupby(groupby_columns)
        .agg({'hits': 'mean'})
        .reset_index()
        .sort_values(['problem_name', 'hits'], ascending=[True, False])
        .reset_index(drop=True)
    )

    # Get the best result
    deeplifting_results_df = deeplifting_results_df.groupby(['problem_name']).first()
    deeplifting_results_df = deeplifting_results_df.reset_index().rename(
        columns={'hits': 'success rate'}
    )

    return deeplifting_results_df

# Load in the algorithm comparison results

In [None]:
algorithm_file_directory = './algorithm_compare_results/low-dimensional/*/*'
algorithm_files = glob.glob(algorithm_file_directory)
algorithm_df = pd.read_parquet(algorithm_files)
algorithm_df.head()

In [None]:
# Generate the results for the algorithms
algorithm_results_df = (
    algorithm_df.groupby(['problem_name', 'algorithm'])
    .agg({'hits': 'mean'})
    .reset_index()
    .rename(columns={'hits': 'success rate'})
)
algorithm_results_df.head()

# Generate the deeplifting results

In [None]:
deeplifting_file_directory = './data-queue/low-dimensional-deeplifting/*/*'
deeplifting_files = glob.glob(deeplifting_file_directory)
deeplifting_df = pd.read_parquet(deeplifting_files)

# We need to create the hits column
deeplifting_df['hits'] = (
    np.abs(deeplifting_df['global_minimum'] - deeplifting_df['f']) <= 1e-4
)
deeplifting_df.head()

In [None]:
# Deeplifting groupby columns
groupby_columns = [
    'problem_name',
    'algorithm',
    'input_size',
    'hidden_size',
    'hidden_activation',
    'output_activation',
    'agg_function',
    'include_bn',
]

# Final deeplifting results columns
columns = ['problem_name', 'algorithm', 'success rate']

# Find the best neural network for the problem
deeplifting_results_df = (
    deeplifting_df.groupby(groupby_columns)
    .agg({'hits': 'mean'})
    .reset_index()
    .sort_values(['problem_name', 'hits'], ascending=[True, False])
    .reset_index(drop=True)
)

# Get the best result
deeplifting_results_df = deeplifting_results_df.groupby(['problem_name']).first()
deeplifting_results_df = deeplifting_results_df.reset_index().rename(
    columns={'hits': 'success rate'}
)

# Filter columns for combining data
deeplifting_results_df = deeplifting_results_df[columns]
deeplifting_results_df

In [None]:
# Need to work the eggholder, griewank and schwefel

In [None]:
# Combine all results
results_df = pd.concat([algorithm_results_df, deeplifting_results_df], axis=0)

# Lets rename some of the algorithms for space
algorithm_map = {
    'Differential Evolution': 'DE',
    'Dual Annealing': 'DA',
    'Deeplifting': 'DL',
    'PyGRANSO': 'PG',
    'IPOPT': 'IPOPT',
    'SCIP': 'SCIP',
}

# Make updates to naming
results_df['algorithm'] = results_df['algorithm'].map(algorithm_map)
# results_df['problem_name'] = results
results_df.head()

In [None]:
# Let's create the final plot and heat map
# Let's try a heat map to look at all problems
fn_results_df = results_df.pivot_table(
    index='problem_name', columns='algorithm', values='success rate'
).fillna(0.0)

# Order the columns
column_order = ['DL', 'DA', 'DE', 'IPOPT', 'PG', 'SCIP']
fn_results_df = fn_results_df[column_order]
fn_results_df.sort_values('DL')

In [None]:
fn_results_df.mean()

In [None]:
# Get a sample of the data
np.random.seed(0)
sampled_results = fn_results_df.sample(frac=0.50).sort_index()
sampled_results.index = sampled_results.index.map(problem_name_mapping)

# Generate the heatmap
fig, ax1 = plt.subplots(1, 1, figsize=(9, 6))

sns.heatmap(
    data=sampled_results,
    cmap='jet',
    ax=ax1,
    # annot=True,
    # fmt=".1f"
)
ax1.xaxis.tick_top()
ax1.tick_params(axis='both', which='major', labelsize=14)
ax1.set(xlabel="", ylabel="")

fig.tight_layout()
# fig.savefig('./paper-images/2d-heatmap-results.png', bbox_inches='tight')

# Rectification

In [None]:
deeplifting_file_directory = './low-dimension-search-results/*/*'
deeplifting_files = glob.glob(deeplifting_file_directory)
print(len(deeplifting_files))

updated_deeplifting_df = build_dl_results(deeplifting_files)

In [None]:
updated_deeplifting_df