In [1]:
import sys

sys.path.append("../")


In [2]:
import os
import json
import shutil

import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm

from cs168_project.experiments import (
    DatasetType, AlgorithmType, ExperimentConfig, ExperimentManager, ExperimentStatsRunner)
from cs168_project.datasets import DATASET_TO_LABEL
from cs168_project.algorithm import ALGORITHM_TO_LABEL
from cs168_project.evaluation import DEFAULT_OUTPUT_DIR

plt.ioff()


  @numba.jit()
  @numba.jit()
  @numba.jit()
  @numba.jit()


<contextlib.ExitStack at 0x1be51ee7910>

In [3]:
configs_fname = "experiment_configs_test1.json"


In [4]:
with open(configs_fname, 'r') as f:
    configs = json.load(f)
configs = [ExperimentConfig.from_dict(c) for c in configs]


In [5]:
manager = ExperimentManager()


In [6]:
stats_per_experiment = [manager.load_stats(config) for config in tqdm(configs)]


 33%|███▎      | 24/72 [00:32<01:05,  1.37s/it]

Files already downloaded and verified
Files already downloaded and verified


 35%|███▍      | 25/72 [00:35<01:26,  1.84s/it]

Files already downloaded and verified
Files already downloaded and verified


 36%|███▌      | 26/72 [00:38<01:39,  2.17s/it]

Files already downloaded and verified
Files already downloaded and verified


 38%|███▊      | 27/72 [00:41<01:48,  2.40s/it]

Files already downloaded and verified
Files already downloaded and verified


 39%|███▉      | 28/72 [00:44<01:52,  2.56s/it]

Files already downloaded and verified
Files already downloaded and verified


 40%|████      | 29/72 [00:47<01:54,  2.67s/it]

Files already downloaded and verified
Files already downloaded and verified


 42%|████▏     | 30/72 [00:50<01:55,  2.75s/it]

Files already downloaded and verified
Files already downloaded and verified


 43%|████▎     | 31/72 [00:53<01:54,  2.80s/it]

Files already downloaded and verified
Files already downloaded and verified


 44%|████▍     | 32/72 [00:56<01:53,  2.83s/it]

Files already downloaded and verified
Files already downloaded and verified


 46%|████▌     | 33/72 [00:59<01:51,  2.86s/it]

Files already downloaded and verified
Files already downloaded and verified


 47%|████▋     | 34/72 [01:02<01:49,  2.88s/it]

Files already downloaded and verified
Files already downloaded and verified


 49%|████▊     | 35/72 [01:05<01:47,  2.89s/it]

Files already downloaded and verified
Files already downloaded and verified


100%|██████████| 72/72 [01:08<00:00,  1.05it/s]


In [7]:
dataset_fields = [
    DatasetType.MNIST,
    DatasetType.FASHION_MNIST,
    DatasetType.CIFAR10,
    DatasetType.SKLEARN_DIGITS,
    DatasetType.SKLEARN_IRIS,
    DatasetType.SKLEARN_CUSTOM
]
algorithm_fields = [
    AlgorithmType.PCA,
    AlgorithmType.LLE,
    AlgorithmType.TSNE,
    AlgorithmType.UMAP
]
num_trials = 3

knn_num_neighbors_data = np.zeros(
    (len(dataset_fields), len(algorithm_fields), ExperimentStatsRunner.NUM_KNN_TESTS), dtype=np.int64)
knn_acc_data = np.zeros(
    (len(dataset_fields), len(algorithm_fields), ExperimentStatsRunner.NUM_KNN_TESTS, num_trials), dtype=np.float64)
for stats, config in zip(stats_per_experiment, configs):
    i = dataset_fields.index(config.dataset_config.dataset_type)
    j = algorithm_fields.index(config.algorithm_config.algorithm_type)
    k = config.trial_index
    num_neighbor_values, knn_acc_values = list(zip(*stats.knn_accs))
    knn_num_neighbors_data[i, j] = np.array(num_neighbor_values, dtype=np.int64)
    knn_acc_data[i, j, :, k] = np.array(knn_acc_values, dtype=np.float64)


In [8]:
for i, dataset_type in enumerate(dataset_fields):
    table = r"\\textbf{k} & "
    for algorithm_type in algorithm_fields:
        table += f"\\textbf{{{ALGORITHM_TO_LABEL[algorithm_type]}}} & "
    table += " \\\\ \n"
    for j in range(ExperimentStatsRunner.NUM_KNN_TESTS):
        num_neighbors = knn_num_neighbors_data[i, k, j]
        table += f"{num_neighbors} & "
        for k, algorithm_type in enumerate(algorithm_fields):
            acc_avg = np.mean(knn_acc_data[i, k, j])
            acc_std = np.std(knn_acc_data[i, k, j])
            table += "{:.2F} {{\\tiny ($\\pm$ {:.4f})}} & ".format(acc_avg, acc_std)
        table += " \\\\ \n"
    fname = os.path.join(DEFAULT_OUTPUT_DIR, "knn_acc_table_{}.txt".format(dataset_type.value))
    with open(fname, 'w') as f:
        f.write(table)

In [9]:
for i, dataset_type in enumerate(dataset_fields):
    fig = plt.figure()
    ax = fig.add_subplot()
    for j, algorithm_type in enumerate(algorithm_fields):
        x = knn_num_neighbors_data[i, j]
        acc_avgs = np.mean(knn_acc_data[i, j], axis=1)
        acc_stds = np.std(knn_acc_data[i, j], axis=1)
        ax.errorbar(x=x, y=acc_avgs, yerr=acc_stds, marker='o', capsize=10, label=ALGORITHM_TO_LABEL[algorithm_type])
    ax.set_title(f"{DATASET_TO_LABEL[dataset_type]} kNN Classification Performance")
    ax.set_xlabel("Num Neighbors")
    ax.set_ylabel("Accuracy")
    ax.legend(bbox_to_anchor=(1.04, 1), loc='upper left')
    fig.tight_layout()
    plt.close()
    
    fname = os.path.join(DEFAULT_OUTPUT_DIR, "knn_acc_plot_{}.png".format(dataset_type.value))
    fig.savefig(fname)


In [10]:
dataset_type = DatasetType.MNIST

for algorithm_type in algorithm_fields:
    fname = os.path.join(
        DEFAULT_OUTPUT_DIR,
        "embedding_plot_{}_{}.png".format(dataset_type.value, algorithm_type.value))
    for stats, config in zip(stats_per_experiment, configs):
        if ((config.dataset_config.dataset_type == dataset_type) and 
            (config.algorithm_config.algorithm_type == algorithm_type)):
            break
    else:
        raise ValueError()
    shutil.copyfile(stats.fig_fname, fname)