In [None]:
import pickle
import numpy as np

with open('../outputs/synthesizers/scores_hard.pkl', 'rb') as f:
    existing_data = pickle.load(f)
with open('../outputs/synthesizers/hyperparams_hard.pkl', 'rb') as f:
    existing_params = pickle.load(f)

cat = [d['mod_name'] for d in existing_params]
scores = [np.mean(e) for e in existing_data]
stds = [1.96 * np.std(e) / np.sqrt(10) for e in existing_data]

In [None]:
# SMOTE
with open('../outputs/synthesizers/smote_base.pkl', 'rb') as f:
    smote = pickle.load(f)
with open('../outputs/synthesizers/smote_hard.pkl', 'rb') as f:
    smote_hard = pickle.load(f)

print(np.mean(smote))
print(np.mean(smote_hard))

In [None]:
%config InlineBackend.figure_format = 'svg'
import matplotlib.pyplot as plt
from matplotlib.ticker import MaxNLocator

plt.rc('font', family='serif')
plt.rc('ytick', direction='out', color='gray')
plt.rc('xtick', direction='out', color='gray')
plt.rcParams.update({'font.size': 12})

category_colors = {'TVAE': 'yellowgreen', 'CTGAN': 'sandybrown'}

plt.rc('axes', grid=True)
plt.rcParams['grid.color'] = (0.5, 0.5, 0.5, 0.2)

fig, ax = plt.subplots(1, 1, figsize=(8, 5))
seen_categories = set()
for x, y, yerr, cat in zip(range(len(existing_data)), scores, stds, cat):
    color = category_colors[cat]
    ax.errorbar(x, y, yerr=yerr, fmt='o', color=color, ecolor=color, capsize=3, markersize=3, alpha=1.)
    ax.plot(x, y, 'o', color=color, markersize=3, alpha=1)
    if cat not in seen_categories:
        ax.errorbar([], [], [], fmt='o', color=color, ecolor=color, capsize=3, markersize=3, alpha=1., label=cat)
        seen_categories.add(cat)
ax.axhline(0, ls='--', lw=1, color='lightgray')
ax.set_xlim((-1, len(existing_data)))
ax.set_ylim((-0.00015, 0.00045))
ax.set_xlabel('ID in Bayesian optimisation')
ax.set_ylabel('Variation in Gini after augmentation')
ax.set_title('Augment hardest $10\%$ by $100\%$')
ax.xaxis.set_major_locator(MaxNLocator(integer=True))
ax.legend()
ax.grid(axis='x')
fig.savefig('../outputs/figures/tuning.pdf', dpi=300, bbox_inches='tight')

In [None]:
with open('../outputs/synthesizers/scores_tot.pkl', 'rb') as f:
    existing_data_tot = pickle.load(f)
with open('../outputs/synthesizers/hyperparams_tot.pkl', 'rb') as f:
    existing_params_tot = pickle.load(f)

cat = [d['mod_name'] for d in existing_params_tot]
scores = [np.mean(e) for e in existing_data_tot]
stds = [1.96 * np.std(e) / np.sqrt(10) for e in existing_data_tot]

%config InlineBackend.figure_format = 'svg'
import matplotlib.pyplot as plt
from matplotlib.ticker import MaxNLocator

plt.rc('font', family='serif')
plt.rc('ytick', direction='out', color='gray')
plt.rc('xtick', direction='out', color='gray')
plt.rcParams.update({'font.size': 12})

category_colors = {'TVAE': 'yellowgreen', 'CTGAN': 'sandybrown'}

fig, ax = plt.subplots(1, 1, figsize=(8, 5))
seen_categories = set()
for x, y, yerr, cat in zip(range(len(existing_data_tot)), scores, stds, cat):
    color = category_colors[cat]
    ax.errorbar(x, y, yerr=yerr, fmt='o', color=color, ecolor=color, capsize=3, markersize=3, alpha=1.)
    ax.plot(x, y, 'o', color=color, markersize=3, alpha=1)
    if cat not in seen_categories:
        ax.errorbar([], [], [], fmt='o', color=color, ecolor=color, capsize=3, markersize=3, alpha=1., label=cat)
        seen_categories.add(cat)
ax.axhline(0, ls='--', lw=1, color='lightgray')
ax.set_xlim((-1, len(existing_data_tot)))
ax.set_ylim((-0.00015, 0.00045))
ax.set_xlabel('ID in Bayesian optimisation')
ax.set_ylabel('Variation in Gini after augmentation')
ax.set_title('Augment by $10\%$')
ax.xaxis.set_major_locator(MaxNLocator(integer=True))
ax.grid(axis='x')
fig.savefig('../outputs/figures/tuning_tot.pdf', dpi=300, bbox_inches='tight')

In [None]:
# Best TVAE hard
existing_params[0]

In [None]:
# Best CTGAN hard
existing_params[3]

In [None]:
# Best TVAE tot
existing_params_tot[3]

In [None]:
# Best CTGAN tot
existing_params_tot[1]