In [None]:
% matplotlib inline

In [None]:
import pandas as pd
import seaborn as sns

In [None]:
sns.set(
    style='ticks',
    context='talk',
)

In [None]:
from matplotlib import rc

rc('font', **{
    'family': 'serif',
    'sans-serif': ['Times']
})
rc('text', usetex=True)

# To Run
Be sure to replace `hackjob_results_<timestamp>.csv` with the path to the result of running `scripts/hackjobs.py`.

In [None]:
import os.path as osp
import re

dirname = osp.join('..', 'results', 'hackjob')
filenames = [
    'hackjob_results_<timestamp>.csv',
]
df_hackjob = pd.concat([
    pd.read_csv(osp.join(dirname, filename)).rename(
        columns={'Test Accuracy Epoch': 'Test Accuracy',
                 'Test Accuracy Top5 Epoch': 'Test Accuracy Top5'})
    for filename in filenames
], ignore_index=True)


def model_sort(value):
    match = re.search(r'^([^\d]*)(\d+).*$', value)
    if match is None:
        return (value, 0)
    name, num = match.groups()
    return (name, int(num))


df_hackjob.sort_values(by=['Dataset', 'Model'], inplace=True,
                       key=lambda col: col.apply(model_sort))
df_hackjob

In [None]:
df_sub = df_hackjob[['Model', 'Stage Name']]
for model, df_sub_model in df_sub.groupby('Model'):
    print('        ', model.replace('_', r'\_'), '&', ','.join(df_sub_model['Stage Name'].values).replace('_', r'\_'),
          r'\\')

In [None]:
root_dir = osp.dirname(osp.realpath('.'))
try:
    import pixpnet
except ImportError:
    import sys

    sys.path.append(root_dir)

    import pixpnet

In [None]:
import numpy as np
from pixpnet.symbolic import models as sym_models


def compute_rf_stats(model_name, height, width, stage_names):
    out, intermediates = sym_models.compute_rf_data(
        model_name, height, width, num_classes=1)
    assert all(name in intermediates for name in stage_names), (
        stage_names, intermediates.keys())

    stats = []
    for stage_name in stage_names:
        intermediate = intermediates[stage_name]
        if getattr(intermediate, 'identical_channels', False):
            intermediate = intermediate[:, 0]
        intermediate = intermediate.flatten()
        inter_inp_symbols_lens = [*map(len, intermediate)]
        stats.append(np.mean(inter_inp_symbols_lens))
    return stats  # Patch Sizes

In [None]:
def assert_same(df, col):
    uniq_col = df.loc[:, col].unique()
    assert len(uniq_col) == 1, f'{col} is not unique with {len(uniq_col)} values: {uniq_col}'
    return uniq_col[0]

In [None]:
df_hackjob = df_hackjob[~df_hackjob.loc[:, 'Stage Name'].isin({'norm0', 'norm5'})]  # fix for densenets
patch_sizes = []
for (ds_name, model_name), df_ds_model in df_hackjob.groupby(
        ['Dataset', 'Model'], sort=False):
    print(model_name)
    height = assert_same(df_ds_model, 'Height')
    width = assert_same(df_ds_model, 'Width')
    stage_names = df_ds_model['Stage Name']
    patch_sizes.extend(compute_rf_stats(
        model_name, height, width, stage_names))

In [None]:
df = df_hackjob.copy()
df['Patch Size'] = patch_sizes
df['Size'] = df['Height'] * df['Width']
df['Image Size'] = df['Height'].astype(str) + ' × ' + df['Width'].astype(str)
df['Patch % of Image'] = df['Patch Size'] / df['Size'] * 100
stage_num_normed = []
for _, df_model in df.groupby(['Dataset', 'Model'], sort=False):
    stage_num_normed.append(
        (df_model['Stage #'] - df_model['Stage #'].min()) /
        (df_model['Stage #'].max() - df_model['Stage #'].min())
    )
df['Stage # Norm'] = pd.concat(stage_num_normed, ignore_index=True)
df

In [None]:
g = sns.relplot(
    data=df,
    x='Patch % of Image',
    y='Test Accuracy',
    hue='Model',
    style='Model',
    markers=True,
    kind='scatter',
    row='Dataset',
    height=8,
    s=200,
)
g.set(xscale='log')

In [None]:
from pixpnet.utils import is_pareto_efficient

df['Pareto Rank'] = -1
df_running = df
rank = 1
max_rank = np.inf
while len(df_running) and not rank >= max_rank:
    nd_mask = is_pareto_efficient(
        df_running[['Test Accuracy', 'Patch % of Image']].values,
        maximize=[True, False],
    )
    df.loc[df_running.index[nd_mask], 'Pareto Rank'] = rank
    df_running = df_running[~nd_mask]
    rank += 1
df

In [None]:
from adjustText import adjust_text

df_tex = df.copy()
df_tex['Model'] = df_tex['Model'].apply(
    lambda s: re.sub(r'vgg(\d+)_bn', r'vgg\1', s))
df_tex['Model'] = df_tex['Model'].str.replace('_', '\\_')

g = sns.relplot(
    data=df_tex[df_tex['Pareto Rank'] < 2],
    x='Patch % of Image',
    y='Test Accuracy',
    hue='Model',
    style='Model',
    markers=True,
    kind='scatter',
    height=6,
    s=250,
)


def maybe_int(v):
    try:
        return int(v)
    except ValueError:
        return v


for ax in g.axes.flat:
    df_nd = df_tex[df_tex['Pareto Rank'] == 1]
    df_nd_sort = df_nd.sort_values(by=g._x_var)
    ax.step(df_nd_sort[g._x_var], df_nd_sort[g._y_var], c='gray', linestyle='--',
            linewidth=1.5, where='post')
    texts = []
    for i in df_nd.index:
        txt = ax.text(
            df_nd.loc[i, g._x_var] + 2.5,
            df_nd.loc[i, g._y_var],
            df_nd.loc[i, 'Stage Name'],
            horizontalalignment='left',
            size='small', color='black'
        )
        texts.append(txt)
    adjust_text(texts, only_move={'points': 'y', 'texts': 'y'},
                arrowprops=dict(arrowstyle="-", color='gray', lw=1))

    ax.set_xlabel('Mean Receptive Field Size (\\%)')

# Uncomment to save figure
# !mkdir -p ../ figures
# g.savefig('../figures/imagenette_rf_vs_accuracy_pareto_front.pdf')