In [1]:
import pandas as pd
import wandb
from tqdm.notebook import tqdm
import pickle
from os.path import exists
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import math
import ast
import scipy as sp
import scipy.stats as sps

from matplotlib.ticker import MaxNLocator
#...

font = {'family' : 'times',
        'size'   : 14}

matplotlib.rc('font', **font)



In [2]:
class Experiment:
    def __init__(self, run):
        self.name = run.name
        self.config = run.config
        self.summary = run.summary
        self.history = run.history()
        self.tags = run.tags
        self.run = run
        
    def get_id(self):
        return (self.config['formula'],self.config['mol_idx'])
        
    def get_history(self):
        return np.array(list(self.history['additional_steps'])).cumsum()

In [3]:
def fetch(project):
    api = wandb.Api()
    entity = "bogp"
    hdata = []
    runs = api.runs(entity + "/" + project)
    for run in tqdm(runs):
        try:
            hdata.append(Experiment(run))
        except:
            pass
    return hdata

In [None]:
raw = fetch("scale_master")

  0%|          | 0/300 [00:00<?, ?it/s]

# Width

In [None]:
exps = {}
for exp in raw:
    if exp.run.group == "bayes4":
        print(exp.name)
        exps[exp.config["num_particles"]] = exp
exps

In [None]:
def calc(exps, p):
    if p == 32:
        # huh? why doesn't fetch work
        d = {0: [968, 0, 0, 0, 0, 3, 5, 3, 1, 0], 1: [0, 1119, 2, 3, 0, 1, 2, 1, 7, 0], 2: [6, 1, 996, 8, 2, 1, 4, 6, 8, 0], 3: [0, 0, 1, 984, 0, 6, 0, 2, 13, 4], 4: [2, 0, 3, 0, 969, 1, 1, 2, 1, 3], 5: [5, 0, 1, 12, 1, 859, 10, 2, 2, 0], 6: [5, 2, 1, 0, 4, 3, 941, 0, 2, 0], 7: [2, 4, 6, 9, 0, 0, 0, 996, 4, 7], 8: [2, 1, 3, 4, 6, 3, 4, 1, 946, 4], 9: [4, 5, 6, 8, 30, 6, 0, 7, 4, 939]}
    elif p == 64:
        # huh? why doesn't fetch work
        d = {0: [971, 0, 0, 2, 0, 2, 3, 1, 1, 0], 1: [0, 1120, 2, 3, 0, 2, 2, 3, 2, 1], 2: [6, 3, 994, 9, 0, 2, 1, 10, 7, 0], 3: [0, 0, 10, 977, 0, 10, 0, 5, 2, 6], 4: [4, 1, 4, 0, 946, 0, 5, 0, 0, 22], 5: [5, 0, 4, 11, 1, 862, 5, 1, 1, 2], 6: [7, 3, 2, 0, 4, 7, 932, 0, 3, 0], 7: [2, 2, 11, 6, 0, 0, 0, 995, 3, 9], 8: [2, 1, 5, 8, 5, 2, 1, 3, 941, 6], 9: [5, 2, 1, 6, 11, 4, 0, 6, 1, 973]}
    else:
        d = ast.literal_eval(exps[p].history.orig_dist0[11])
    if p == 32:
        # huh? why doesn't fetch work
        d_swag = {0: [973, 0, 0, 0, 0, 1, 2, 1, 3, 0], 1: [0, 1123, 3, 2, 0, 1, 2, 1, 3, 0], 2: [6, 0, 1011, 0, 1, 1, 2, 7, 4, 0], 3: [0, 0, 3, 994, 0, 6, 0, 2, 3, 2], 4: [2, 0, 0, 0, 971, 1, 5, 1, 0, 2], 5: [3, 0, 1, 3, 1, 878, 4, 0, 1, 1], 6: [7, 2, 0, 0, 3, 2, 942, 0, 2, 0], 7: [3, 4, 9, 4, 1, 0, 0, 1003, 1, 3], 8: [4, 1, 2, 6, 3, 3, 1, 1, 953, 0], 9: [4, 6, 2, 5, 22, 4, 0, 8, 3, 955]}
    else:
        d_swag = ast.literal_eval(exps[p].history.max_dist0[len(exps[p].history.max_dist0) - 1])
    misclass = 0
    misclass_swag = 0
    total = 0
    for c in range(10):
        misclass += sum(d[c]) - d[c][c]
        total += sum(d[c])
        misclass_swag += sum(d_swag[c]) - d_swag[c][c]
        # print("Orig", c, sps.entropy([x / sum(d[c]) for x in d[c]]))
        # print("Swag", c, sps.entropy([x / sum(d_swag[c]) for x in d_swag[c]]))
    print("original misclass", misclass, "mswag misclass", misclass_swag)
    return 1 - (misclass/total), 1- (misclass_swag/total)

In [None]:
ps = [1, 2, 4, 8, 16, 32]
orig = []
mswag = []
for p in ps:
    m1, m2 = calc(exps, p)
    orig += [m1]
    mswag += [m2]
plt.plot(ps, orig, label='Standard', marker='s', linestyle="--" )
plt.plot(ps, mswag, label="Multi-Swag", marker='o', linestyle=":")
plt.xlabel("Particles")
plt.ylabel("Accuracy")
plt.title("Standard Training vs. Multi-Swag on MNIST")
plt.legend()

In [None]:
params = []
for p in ps:
    params += [exps[p].config["num_params"]]
params

In [None]:
orig, mswag

In [None]:
df = pd.DataFrame({
    "parameters": params,
    "original accuracy": orig,
    "particles": ps,
    "mswag accuracy": mswag,
})
df

In [None]:
df.to_latex(buf="table_width.tex", index=False)

# Depth

In [None]:
bayes5 = {}
for exp in raw:
    if exp.run.group == "bayes5":
        print(exp.name)
        bayes5[exp.config["num_particles"]] = exp
bayes5

In [None]:
def calc2(exps, p):
    if p == 32:
        d= {0: [974, 0, 0, 0, 1, 0, 2, 1, 2, 0], 1: [0, 1126, 6, 0, 0, 1, 2, 0, 0, 0], 2: [4, 0, 1008, 4, 1, 1, 3, 7, 4, 0], 3: [0, 0, 8, 991, 0, 4, 0, 4, 3, 0], 4: [3, 1, 2, 0, 957, 1, 6, 0, 0, 12], 5: [3, 0, 2, 9, 0, 868, 8, 1, 0, 1], 6: [4, 3, 1, 1, 4, 6, 936, 0, 3, 0], 7: [2, 4, 11, 4, 0, 0, 0, 994, 2, 11], 8: [8, 0, 4, 3, 5, 3, 4, 2, 941, 4], 9: [5, 3, 0, 4, 8, 2, 1, 3, 3, 980]}
    else:
        d = ast.literal_eval(exps[p].history.orig_dist0[11])
    d_swag = ast.literal_eval(exps[p].history.max_dist0[len(exps[p].history.max_dist0) - 1])
    misclass = 0
    misclass_swag = 0
    total = 0
    for c in range(10):
        misclass += sum(d[c]) - d[c][c]
        total += sum(d[c])
        misclass_swag += sum(d_swag[c]) - d_swag[c][c]
        # print("Orig", c, sps.entropy([x / sum(d[c]) for x in d[c]]))
        # print("Swag", c, sps.entropy([x / sum(d_swag[c]) for x in d_swag[c]]))
    print("original misclass", misclass, "mswag misclass", misclass_swag)
    return 1 - (misclass/total), 1 - (misclass_swag/total)

In [None]:
params = []
for p in ps:
    params += [bayes5[p].config["num_params"]]
orig = []
mswag = []
for p in ps:
    m1, m2 = calc2(bayes5, p)
    orig += [m1]
    mswag += [m2]
    
df = pd.DataFrame({
    "parameters": params,
    "original accuracy": orig,
    "particles": ps,
    "mswag accuracy": mswag,
})
df

In [None]:
df.to_latex(buf="table_depth.tex", index=False)

# Plot

In [None]:
ps = [1, 2, 4, 8, 16, 32]
orig = []
mswag = []
for p in ps:
    m1, m2 = calc2(exps, p)
    orig += [m1]
    mswag += [m2]
fig, ax = plt.subplots()
ax.plot(ps, mswag, label="Multi-Swag", marker='o', linestyle=":")
ax.plot(ps, orig, label='Standard', marker='s', linestyle="--" )

def foo(x):
    print(x)
    return exps[x].config["num_params"]

params_to_p = {}
for p in ps:
    params_to_p[exps[p].config["num_params"]] = p

def foo_inv(x):
    return params_to_p[x]
    
secax = ax.secondary_xaxis('top', functions=(foo, foo_inv))


ax.set_xlabel("Particles")
ax.set_ylabel("Acurracy")
ax.set_title("Standard Training vs. Multi-Swag on MNIST")
ax.legend()