In [None]:
# default_exp testing_utils
from nbdev.showdoc import *
import numpy as np
import matplotlib.pyplot as plt
import torch
import FRED
if torch.__version__[:4] == '1.14': # If using pytorch with MPS, use Apple silicon GPU acceleration
    device = torch.device("cuda" if torch.cuda.is_available() else 'mps' if torch.has_mps else "cpu")
else:
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device", device)
%load_ext autoreload
%autoreload 2

Using device mps
The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Comprehensive Double Helix Benchmarks on FRED

Explanation: this notebook prepares a `papermill` powered test of the toy dataset benchmarker, using the noisy double helix as our protege.

Here's what it does:
1. Start with a dict of possible parameters, each contained in a list, like this:

In [None]:
import papermill as pm

In [None]:
parameters_dict = {
    'notebook' : ['05b Toy Dataset Benchmarker.ipynb'], # the notebook we are inserting these parameters into
    'dataset_name' : ['noisy double helix'],
    'sigma' : [1],
    'flow_strength' : [1, 2, 5],
    'smoothness_weight' : [0],
    'flow_neighbor_loss_weight' : [1, 5, 10],
    'diffdist_weight' : [1, 5, 10],
    'num_neighbors' : [5]
}

2. Use this function to create a JSON file with a dictionary for each possible combination of these parameters.

In [None]:
# export
import json
import os

counter = 0
def parameters_list_from_dict(parameters_dict, outfile, filetype = "dsq", kernel_name = 'FREDkernel_jupyter', preamble = 'module load miniconda; conda activate FREDkernel;'):
    ''' Given a dictionary whose values are each a list of possible values, creates a json file with one dictionary for every possible combination of values. 
        If filetype is `dsq`, then makes a txt file with each line corresponding to a job, ending in `papermill ....`.
    '''
    if filetype=='json':
        with open(outfile, 'a') as f:
                    f.write('{')
    keys = list(parameters_dict.keys())
    run_num = 0
    def add_key_to_dict(little_dict, key_num):
        if key_num == len(keys):
            global counter
            counter += 1
            # write little dict to outfile
            descriptor = f"run{counter}" + little_dict.__str__().replace(little_dict['notebook'],'').replace(' ','').replace("'","").replace(':','-').replace('{','').replace('}','').replace('.','').replace(',','_').replace('notebook-','')
            little_dict['output_label'] = descriptor
            with open(outfile, 'a') as f:
                if filetype == "json":
                    f.write(f'\n"{descriptor}":')
                    json.dump(little_dict,f,indent=4)
                    f.write(',')
                elif filetype == "dsq":
                    in_notebook = little_dict['notebook']
                    directory = 'papermilled/'+in_notebook.replace('.ipynb','')
                    if not os.path.exists(directory):
                        os.makedirs(directory)
                    out_notebook =  directory + "/" + descriptor + '.ipynb'
                    command = f'{preamble} papermill "{in_notebook}" "{out_notebook}" -k {kernel_name} -y "{little_dict.__str__()}"'
                    f.write(f"{command}\n")
                    
        else:
            current_key = keys[key_num]
            for val in parameters_dict[current_key]:
                little_dict[current_key] = val
                add_key_to_dict(little_dict.copy(), key_num + 1)
    little_dict = {}
    add_key_to_dict(little_dict, 0)
    # delete last trailing comma, as json doesn't like this
    if filetype == 'json':
        with open(outfile, 'rb+') as f:
            f.seek(-1, os.SEEK_END)
            f.truncate()
        with open(outfile, 'a') as f:
            f.write('}')
    print(f"Created {counter} test scenarios.")
            

In [None]:
parameters_list_from_dict(parameters_dict, 'AUNT_HILLARY_REBORN.txt', filetype='dsq')

Created 27 test scenarios.


3. Run the `nb_batch_run.py` python file, with the above json file specified as an argument. 

In [None]:
!python run_nb_batch.py --config_file noisy_double_helix_benchmark.json --run_mode parallel

^C


In [None]:
parameters_dict = {
    'notebook' : '05b Toy Dataset Benchmarker.ipynb', # the notebook we are inserting these parameters into
    'dataset_name' : 'noisy double helix',
    'sigma' : 1,
    'flow_strength' : 1,
    'smoothness_weight' : 0,
    'flow_neighbor_loss_weight' : 1,
    'diffdist_weight' : 1,
    'num_neighbors' : 5
}

In [None]:
parameters_dict.__str__()

"{'notebook': '05b Toy Dataset Benchmarker.ipynb', 'dataset_name': 'noisy double helix', 'sigma': 1, 'flow_strength': 1, 'smoothness_weight': 0, 'flow_neighbor_loss_weight': 1, 'diffdist_weight': 1, 'num_neighbors': 5}"

In [None]:
parameters_dict.__str__().replace(parameters_dict['notebook'],'').replace(' ','').replace("'","").replace(':','-').replace('{','').replace('}','').replace('.','').replace(',','_').replace('notebook-','')

'_dataset_name-noisydoublehelix_sigma-1_flow_strength-1_smoothness_weight-0_flow_neighbor_loss_weight-1_diffdist_weight-1_num_neighbors-5'