In [1]:
import pathlib
import pickle
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns

from config import get_PCA

import sys
sys.path.append('./PCA')
from utils import get_methods

methods = get_methods()

In [2]:
ext = 'png'

In [3]:
def plot_tradeoff_std(ax, load_fn, k, seeds, margin=-1):
    keys = ['ConstPCA', 'LossGD', 'LossMEG', 'RobustPCA', 'BatchPCA']
    labels = ['ConsistentPCA', 'GD', 'MEG', 'Spherical PCA', 'Vanilla PCA']
    palette = sns.color_palette("Set1", len(keys))[::-1]
    markers = ['o', '^', 'v', 'P', 'X']
    msizes = [10, 10, 10, 18, 18]
    linestyles = ['-', '-', '-', '', '']

    xmin, xmax = np.inf, 0
    ymin, ymax = np.inf, 0
    for j, method in enumerate(keys):
        obj, inc, _ = load_fn(method, k, seeds)
        y = inc.mean(axis=(0, 2))
        ys = inc.mean(axis=2).std(axis=0)
        x = obj.mean(axis=(0, 2))
        xs = obj.mean(axis=2).std(axis=0)
        ax.plot(x, y, marker=markers[j], linestyle=linestyles[j], color=palette[j], markeredgecolor='w',
                markersize=msizes[j], label=labels[j])
        ax.errorbar(x, y, yerr=ys, xerr=xs, fmt='none', color=palette[j], alpha=0.3, elinewidth=1, capsize=0)
        if method in ['ConstPCA', 'BatchPCA']:
            xmin, xmax = min(xmin, x.min()), max(xmax, x.max())
            ymin, ymax = min(ymin, y.min()), max(ymax, y.max())
    if margin > 0:
        ax.set_xlim([xmin - margin * (xmax - xmin), xmax + margin * (xmax - xmin)])
        ax.set_ylim([ymin - margin * (ymax - ymin), ymax + margin * (ymax - ymin)])
    
    yticks = ax.get_yticks().tolist()[::2]
    ax.set_yticks(yticks)
    ax.set_yticklabels(['{:,.2f}'.format(yt) for yt in yticks], fontsize=15)

    xticks = ax.get_xticks().tolist()[::2]
    ax.set_xticks(xticks)
    ax.set_xticklabels(['{:,.0f}'.format(xt) for xt in xticks], fontsize=15)

    axes_left = 0.15
    axes_bottom = 0.15
    axes_width = 0.70
    axes_height = 0.70
    ax.set_position([axes_left, axes_bottom, axes_width, axes_height])

## Synthetic

In [4]:
def load_synthetic(dim, num, method_name, k=2, seed=0):
    result = get_PCA('result').joinpath('synthetic').joinpath('dim%03d_num%04d' % (dim, num))
    result = result.joinpath(method_name).joinpath('k%02d' % (k,))
    obj, inc, rec = [], [], []
    for _, f in methods[method_name]:
        fn = result.joinpath('%s_%03d.npz' % (f, seed))
        res = np.load(fn)
        obj.append(res['obj'])
        inc.append(res['inc'])
        rec.append(res['rec'])
    return np.array(obj), np.array(inc), np.array(rec)

def load_synthetic_all(dim, num, method_name, k=2, seeds=[0]):
    obj, inc, rec = [], [], []
    for seed in seeds:
        obj_s, inc_s, rec_s = load_synthetic(dim, num, method_name, k=k, seed=seed)
        obj.append(obj_s)
        inc.append(inc_s)
        rec.append(rec_s)
    return np.array(obj), np.array(inc), np.array(rec)

In [5]:
# legends
figure = get_PCA('figure')
figure.mkdir(parents=True, exist_ok=True)

k = 2
seeds = range(100)
load_fn = lambda method, k, seeds: load_synthetic_all(100, 100, method, k, seeds)

fig, ax = plt.subplots(figsize=(8, 6))
plot_tradeoff_std(ax, load_fn, k, seeds)
handles, labels = ax.get_legend_handles_labels()
plt.close(fig)

temp_fig = plt.figure()
temp_ax = temp_fig.add_subplot(111)
temp_legend = temp_ax.legend(handles, labels, loc='center', frameon=False, fontsize=16, ncol=len(handles))
temp_fig.canvas.draw()
legend_bbox = temp_legend.get_window_extent()
plt.close(temp_fig)

dpi = 100
fig_legend = plt.figure(figsize=(legend_bbox.width / dpi, legend_bbox.height / dpi))
legend = fig_legend.legend(handles, labels, loc='center', frameon=False, fontsize=16, ncol=len(handles))
fn = figure.joinpath('legend_pca.%s' % ext)
fig_legend.savefig(fn, bbox_inches='tight', pad_inches=0)
plt.close(fig_legend)

In [6]:
ks = [2, 3, 5, 10]
seeds = range(100)

figure = get_PCA('figure').joinpath('synthetic')
figure.mkdir(parents=True, exist_ok=True)
for k in ks:
    for f, margin in zip(['', '_zoom'], [-1, 0.2]):
        for num in [50, 100, 300, 500, 1000]:
            for dim in [20, 50, 100]:
                print(k, margin, num, dim)
                subfigure = figure.joinpath('k%02d%s' % (k, f))
                subfigure.mkdir(parents=True, exist_ok=True)
                sns.set_theme(style="whitegrid")
                sns.set_context("paper", font_scale=1.2)
                fig, axes = plt.subplots(1, 1, figsize=(4, 2), sharey=False)
                load_fn = lambda method, k, seeds: load_synthetic_all(dim, num, method, k, seeds)
                plot_tradeoff_std(axes, load_fn, k, seeds, margin=margin)
                plt.tight_layout()
                fn = subfigure.joinpath('avg_dim%03d_num%04d.%s' % (dim, num, ext))
                plt.savefig(fn)
                plt.close()
                plt.clf()

2 -1 50 20
2 -1 50 50
2 -1 50 100
2 -1 100 20
2 -1 100 50
2 -1 100 100
2 -1 300 20
2 -1 300 50
2 -1 300 100
2 -1 500 20
2 -1 500 50
2 -1 500 100
2 -1 1000 20
2 -1 1000 50
2 -1 1000 100
2 0.2 50 20
2 0.2 50 50
2 0.2 50 100
2 0.2 100 20
2 0.2 100 50
2 0.2 100 100
2 0.2 300 20
2 0.2 300 50
2 0.2 300 100
2 0.2 500 20
2 0.2 500 50
2 0.2 500 100
2 0.2 1000 20
2 0.2 1000 50
2 0.2 1000 100
3 -1 50 20
3 -1 50 50
3 -1 50 100
3 -1 100 20
3 -1 100 50
3 -1 100 100
3 -1 300 20
3 -1 300 50
3 -1 300 100
3 -1 500 20
3 -1 500 50
3 -1 500 100
3 -1 1000 20
3 -1 1000 50
3 -1 1000 100
3 0.2 50 20
3 0.2 50 50
3 0.2 50 100
3 0.2 100 20
3 0.2 100 50
3 0.2 100 100
3 0.2 300 20
3 0.2 300 50
3 0.2 300 100
3 0.2 500 20
3 0.2 500 50
3 0.2 500 100
3 0.2 1000 20
3 0.2 1000 50
3 0.2 1000 100
5 -1 50 20
5 -1 50 50
5 -1 50 100
5 -1 100 20
5 -1 100 50
5 -1 100 100
5 -1 300 20
5 -1 300 50
5 -1 300 100
5 -1 500 20
5 -1 500 50
5 -1 500 100
5 -1 1000 20
5 -1 1000 50
5 -1 1000 100
5 0.2 50 20
5 0.2 50 50
5 0.2 50 100
5 0.2 10

<Figure size 640x480 with 0 Axes>

## face

In [8]:
def load_face(method_name, k=2, seed=0):
    result = get_PCA('result').joinpath('face')
    result = result.joinpath(method_name).joinpath('k%02d' % (k,))
    obj, inc, rec = [], [], []
    for _, f in methods[method_name]:
        fn = result.joinpath('%s_%03d.npz' % (f, seed))
        res = np.load(fn)
        obj.append(res['obj'])
        inc.append(res['inc'])
        rec.append(res['rec'])
    return np.array(obj), np.array(inc), np.array(rec)

def load_face_all(method_name, k=2, seeds=[0]):
    obj, inc, rec = [], [], []
    for seed in seeds:
        obj_s, inc_s, rec_s = load_face(method_name, k=k, seed=seed)
        obj.append(obj_s)
        inc.append(inc_s)
        rec.append(rec_s)
    return np.array(obj), np.array(inc), np.array(rec)

In [10]:
ks = [2, 3, 5, 10]
seeds = range(30)
load_fn = load_face_all

figure = get_PCA('figure').joinpath('face')
figure.mkdir(parents=True, exist_ok=True)
for k in ks:
    for f, margin in zip(['', '_zoom'], [-1, 0.2]):
        print(k, margin)
        subfigure = figure.joinpath('k%02d%s' % (k, f))
        subfigure.mkdir(parents=True, exist_ok=True)
        sns.set_theme(style="whitegrid")
        sns.set_context("paper", font_scale=1.2)
        fig, axes = plt.subplots(1, 1, figsize=(4, 2), sharey=False)
        plot_tradeoff_std(axes, load_fn, k, seeds, margin=margin)
        plt.tight_layout()
        fn = subfigure.joinpath('avg.%s' % ext)
        plt.savefig(fn)
        plt.close()
        plt.clf()

2 -1
2 0.2
3 -1
3 0.2
5 -1
5 0.2
10 -1
10 0.2


<Figure size 640x480 with 0 Axes>

## openml

In [11]:
def load_openml(classwise, data_name, method_name, k=2, seed=0):
    result = get_PCA('result').joinpath('openml').joinpath('classwise' if classwise else 'minibatch').joinpath(data_name)
    result = result.joinpath(method_name).joinpath('k%02d' % (k,))
    obj, inc, rec = [], [], []
    for _, f in methods[method_name]:
        fn = result.joinpath('%s_%03d.npz' % (f, seed))
        res = np.load(fn)
        obj.append(res['obj'])
        inc.append(res['inc'])
        rec.append(res['rec'])
    return np.array(obj), np.array(inc), np.array(rec)

def load_openml_all(classwise, data_name, method_name, k=2, seeds=[0]):
    obj, inc, rec = [], [], []
    for seed in seeds:
        obj_s, inc_s, rec_s = load_openml(classwise, data_name, method_name, k=k, seed=seed)
        obj.append(obj_s)
        inc.append(inc_s)
        rec.append(rec_s)
    return np.array(obj), np.array(inc), np.array(rec)

In [13]:
ks = [2, 3, 5, 10]
seeds = range(30)
classwise = True

figure = get_PCA('figure').joinpath('openml')
figure.mkdir(parents=True, exist_ok=True)
for k in ks:
    for f, margin in zip(['', '_zoom'], [-1, 0.2]):
        for data_name in ['micro-mass', 'har', 'gas-drift', 'mnist_784']:
            print(k, margin, data_name)
            subfigure = figure.joinpath('k%02d%s' % (k, f))
            subfigure.mkdir(parents=True, exist_ok=True)
            sns.set_theme(style="whitegrid")
            sns.set_context("paper", font_scale=1.2)
            fig, axes = plt.subplots(1, 1, figsize=(4, 2), sharey=False)
            load_fn = lambda method, k, seeds: load_openml_all(classwise, data_name, method, k, seeds)
            plot_tradeoff_std(axes, load_fn, k, seeds, margin=margin)
            plt.tight_layout()
            fn = subfigure.joinpath('avg_%s.%s' % (data_name, ext))
            plt.savefig(fn)
            plt.close()
            plt.clf()

2 -1 micro-mass
2 -1 har
2 -1 gas-drift
2 -1 mnist_784
2 0.2 micro-mass
2 0.2 har
2 0.2 gas-drift
2 0.2 mnist_784
3 -1 micro-mass
3 -1 har
3 -1 gas-drift
3 -1 mnist_784
3 0.2 micro-mass
3 0.2 har
3 0.2 gas-drift
3 0.2 mnist_784
5 -1 micro-mass
5 -1 har
5 -1 gas-drift
5 -1 mnist_784
5 0.2 micro-mass
5 0.2 har
5 0.2 gas-drift
5 0.2 mnist_784
10 -1 micro-mass
10 -1 har
10 -1 gas-drift
10 -1 mnist_784
10 0.2 micro-mass
10 0.2 har
10 0.2 gas-drift
10 0.2 mnist_784


<Figure size 640x480 with 0 Axes>