In [1]:
%load_ext autoreload
%autoreload 2

%config InlineBackend.figure_format = "retina"

from IPython.core.display import display, HTML

display(HTML("<style>.container { width:100% !important; }</style>"))


  from IPython.core.display import display, HTML


In [1]:
import pandas as pd
import os
import argparse
from typing import Any, Callable, Sequence
from pathlib import Path
from PIL import Image
import matplotlib.image as mpimg

import jax
import jax.random as jr
import jax.numpy as jnp
from jax import grad, jit, vmap
from jax.flatten_util import ravel_pytree
import numpy as np
import matplotlib.pyplot as plt
from flax import linen as nn


from bong.util import run_rebayes_algorithm, gaussian_kl_div, MLP
from bong.src import bbb, blr, bog, bong, experiment_utils
from bong.agents import AGENT_NAMES
from plot_utils import *



In [4]:
x=np.array([1e-4, 5e-4, 1e-3, 5*1e-3, 1e-2, 5*1e-2, 1e-1])
print(x)

[0.0001 0.0005 0.001  0.005  0.01   0.05   0.1   ]


In [17]:
parser = argparse.ArgumentParser()

# Data parameters
parser.add_argument("--dataset", type=str, default="reg") 
parser.add_argument("--data_dim", type=int, default=10)
parser.add_argument("--data_key", type=int, default=0)
parser.add_argument("--dgp_type", type=str, default="lin") # or mlp
parser.add_argument("--dgp_neurons", type=int, nargs="+", default=[20, 20, 1]) 
parser.add_argument("--dgp_neurons_str", type=str, default="") # 20_20_1 
parser.add_argument("--emission_noise", type=float, default=1.0)
parser.add_argument("--ntrain", type=int, default=500)
parser.add_argument("--nval", type=int, default=500)
parser.add_argument("--ntest", type=int, default=1000)
parser.add_argument("--add_ones", type=bool, default=False)


# Model parameters
parser.add_argument("--agent", type=str, default="bong_fc", choices=AGENT_NAMES)
parser.add_argument("--agent_key", type=int, default=0)
parser.add_argument("--lr", type=float, default=0.01)
parser.add_argument("--niter", type=int, default=10) 
parser.add_argument("--nsample", type=int, default=10) 
parser.add_argument("--ef", type=int, default=0)
parser.add_argument("--linplugin", type=int, default=0)
parser.add_argument("--rank", type=int, default=10)
parser.add_argument("--model_type", type=str, default="lin") # or mlp
parser.add_argument("--model_neurons", type=int, nargs="+", default=[1])
parser.add_argument("--model_neurons_str", type=str, default="") 
parser.add_argument("--init_var", type=float, default=1.0)
parser.add_argument("--algo_key", type=int, default=0)

# results
parser.add_argument("--dir", type=str, default="", help="directory to store results") 
parser.add_argument("--debug", type=bool, default=False)


args = parser.parse_args([])

In [18]:
from datasets import *
from models import *
from bong.agents import *
from run_job import *

data = make_dataset(args)
print(data['X_tr'].shape)

model = make_model(args, data)

constructor = AGENT_DICT[args.agent]['constructor']
agent = constructor(
                    **model['model_kwargs'],
                    agent_key = args.agent_key,
                    learning_rate = args.lr,
                    num_iter = args.niter,
                    num_samples = args.nsample,
                    linplugin = args.linplugin,
                    empirical_fisher = args.ef,
                    rank = args.rank
                )
key = jr.PRNGKey(args.agent_key)
results, elapsed = run_agent(key, agent, data, model)
df = pd.DataFrame(results)
attributes = parse_agent_full_name(agent.name)
meta = { # non time-series data
    'data_name': data['name'],
    'model_name': model['name'],
    'agent_name': agent.name,
    'algo': attributes['algo'],
    'param': attributes['param'],
    'elapsed': elapsed
    }


(500, 10)
Running bong_fc-MC10-I1-LR0-EF1-Lin0 + mlp_10_1 on reg-D10-lin_1
Using GPU of type:  None
Time 15.75s
Test NLL: 1.5503,  NLPD: 1.5489
Val NLL 1.5632,  NLPD: 1.5948


In [4]:
print(args.keys())

dict_keys(['dataset', 'data_dim', 'data_key', 'dgp_type', 'dgp_neurons', 'dgp_neurons_str', 'emission_noise', 'ntrain', 'nval', 'ntest', 'add_ones', 'agent', 'agent_key', 'lr', 'niter', 'nsample', 'ef', 'linplugin', 'rank', 'model_type', 'model_neurons', 'model_neurons_str', 'use_bias', 'init_var', 'algo_key', 'dir', 'debug', 'data_name', 'model_name', 'agent_name', 'algo', 'param', 'elapsed', 'summary'])


In [13]:
from process_jobs import *

root_dir = '/teamspace/studios/this_studio/jobs'
data_dir = 'reg-D10-mlp_20_20_1'
model_dir = 'mlp_1'
agent_dir = 'A:bong-P:fc-Lin:0-LR:0_01-IT:10-MC:100-EF:0-R:0/'
dir = f'{root_dir}/{data_dir}/{model_dir}/{agent_dir}'

def create_results_summary(dir):
    fname = f"{dir}/jobs.csv"
    df = pd.read_csv(fname)
    jobnames = df['jobname']

    # Create dict of dicts, containing meta-results for each experiment
    meta = {}
    for jobname in jobnames:
        fname = f"{dir}/{jobname}/work/args.json"
    with open(fname, 'r') as json_file:
        args = json.load(json_file)
    #meta[jobname] = args
    keep = {'agent_name', 'model_name', 'data_name', 'elapsed', 'summary'}
    d = {}
    for k in keep:
        d[k] = args[k]
    meta[jobname] = d

    # Merge dict of dicts into a dataframe
    df = pd.DataFrame()
    # Iterate over the outer dictionary and create a DataFrame for each nested dictionary
    for key, value in meta.items():
        temp_df = pd.DataFrame([value])
        temp_df['jobname'] = key
        df = pd.concat([df, temp_df], ignore_index=True)
    #df = df.drop(columns=['dir'])
    # Reorder the columns if necessary
    df = df[['jobname', 'agent_name', 'model_name', 'data_name', 'elapsed', 'summary']]

    return df

    
df = create_results_summary(dir)
df.head()





Unnamed: 0,jobname,agent_name,model_name,data_name,elapsed,summary
0,job0-00,bong_fc-MC100-I1-LR0-EF0-Lin0,mlp_1,reg-D10-mlp_20_20_1,1.406131,"Test NLL: 1.4352, NLPD: 1.4501\nVal NLL 1.436..."


In [12]:
# Merge dict of dicts into a dataframe
df = pd.DataFrame()
# Iterate over the outer dictionary and create a DataFrame for each nested dictionary
for key, value in meta.items():
    temp_df = pd.DataFrame([value])
    temp_df['jobname'] = key
    df = pd.concat([df, temp_df], ignore_index=True)
#df = df.drop(columns=['dir'])
# Reorder the columns if necessary
df = df[['jobname', 'agent_name', 'model_name', 'data_name', 'elapsed', 'summary']]

df.head()

Unnamed: 0,jobname,agent_name,model_name,data_name,elapsed,summary
0,job0-00,bong_fc-MC100-I1-LR0-EF0-Lin0,mlp_1,reg-D10-mlp_20_20_1,1.406131,"Test NLL: 1.4352, NLPD: 1.4501\nVal NLL 1.436..."
