In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import os
from glob import glob

In [2]:
def select_from_data(df, algorithm=None, k=None, W=None, V=None, scope=None):
    # Flexible function for selecting rows from the dataframe
    d = {}
    if algorithm is not None:
        d['algorithm'] = algorithm
    if k is not None:
        d['k'] = k
    if W is not None:
        d['W'] = W
    if V is not None:
        d['V'] = V
    if scope is not None:
        d['scope'] = scope
    query = " & ".join(["(df['{0}'] == {1})".format(col, repr(cond)) for col, cond in d.items()])
    # print(query)
    return df[eval(query)]

In [90]:
# Read pickle files
CLASS = 6
data = pd.read_pickle(f'./../results/grid-search-2/class-{CLASS}-df.pickle')
relative_data = pd.read_pickle(f'./../results/grid-search-2/class-{CLASS}-relative-df.pickle')

# K-swap

Did the benchmark replicate the results of Yernaux and Vanhoof (2019)?

They ran experiments for class 1-6, with W and V set to 1, for k set to 0, 2, 4, and inf.

They compare k-swap against an exhaustive search in terms of the size of the generalisation (phi)

In [91]:
# Select rows with kswap (injective) and exhaustive (injective), with W=V=1, and omega_cope = global
kswap_df = select_from_data(data, algorithm='kswap_lcg', W=4, V=4, scope='global')
exhaustive_df = select_from_data(data, algorithm='exhaustive_lcg', scope='global')
relative_df = select_from_data(relative_data, W=4, V=4, scope='global')

In [92]:
len(exhaustive_df.loc[exhaustive_df['cost'] > -1])

1000

In [93]:
kswap_df.loc[kswap_df['cost'] > -1].groupby(['k']).size()

k
0      1000
2      1000
4      1000
NIL     981
dtype: int64

In [94]:
kswap_df = kswap_df.loc[kswap_df['cost'] > -1]
exhaustive_df = exhaustive_df.loc[exhaustive_df['cost'] > -1]

df = pd.concat([kswap_df, exhaustive_df])

In [95]:
print(relative_df.groupby(['k'])['exhaustive_lcg_phi_frac'].mean())
print(relative_df.groupby(['k'])['exhaustive_lcg_phi_frac'].std())

k
0      71.803214
2      89.675105
4      96.153056
NIL    99.990733
Name: exhaustive_lcg_phi_frac, dtype: float64
k
0      12.316689
2       8.459522
4       6.055572
NIL     0.290250
Name: exhaustive_lcg_phi_frac, dtype: float64


In [96]:
exhaustive_df['real_time'].mean()

7447.380955

In [97]:
kswap_df.groupby(['k'])['real_time'].mean()

k
0          4.280015
2          7.019014
4         57.479571
NIL    25499.848406
Name: real_time, dtype: float64

# K-swap with variable decoupling

In our current implementation, the value of `k` should not have an influence on the outcome of the algorithm. Is this the case?

In [None]:
# Select rows with kswap (injective) and exhaustive (injective), with W=V=1, and omega_cope = global
kswap_df = select_from_data(data, algorithm='kswap_decoupling', scope='global')
exhaustive_df = select_from_data(data, algorithm='exhaustive_decoupling', scope='global')

In [None]:
# Filter out rows with failed solutions
kswap_df = kswap_df.loc[kswap_df['cost'] > -1]
exhaustive_df = exhaustive_df.loc[exhaustive_df['cost'] > -1]

In [None]:
df = kswap_df.groupby(['W', 'V', 'k'])['cost'].describe()
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
display(df)

In [None]:
sns.set_style('whitegrid')
grid = sns.FacetGrid(kswap_df, col='W', row='V')
grid.map(sns.boxplot, 'cost', 'k', order=[0, 2, 4, 'NIL'])

In [None]:
sns.set_style('whitegrid')
grid = sns.FacetGrid(kswap_df, col='W', row='V')
grid.map(sns.boxplot, 'real_time', 'k', order=[0, 2, 4, 'NIL'])