# Graph Experiment 2: Experiment Preparation
Set up experiments for the bots to do.

In [None]:
%load_ext autoreload
%autoreload 2

## Capture code as strings
We collect code from chosen cells as strings, and place them in nodes in the graph. The code is intended to be sufficient to reproduce the results.

In [None]:
code_strings = []

In [None]:
import numpy as np
from nnbench import NetMaker, NNMEG
import secrets
import time
import tools.neotools as nj

In [None]:
code_strings.append(In[-2]) # Grab the cell above

In [None]:
from collections import defaultdict
import neo4j
import grexutils as gu

## Connecting

In [None]:
driver = neo4j.GraphDatabase.driver("neo4j://neo4j:7687", auth=("neo4j", "test"))

In [None]:
driver.verify_connectivity()

# The graph database

In [None]:
from IPython.display import SVG, display
display(SVG('ml graphdb structure r2.svg'))

# Prepare code for later `eval`

In [None]:
try:
    np.random.seed(random_seed)
except NameError:
    np.random.seed(0)

In [None]:
code_strings.append(In[-2])

## Create the net we will train

In [None]:
net_shorthand = '1x8tx8tx3tx3t'
# NOTE this is not captured for running by workers!
# It is here as a stand-in to prevent NameError.
# The Parameters node's prepend code string provides it

In [None]:
mnm = NetMaker(NNMEG)
#net = mnm('1x8tx8tx3tx3t')
net = mnm(net_shorthand)

def adc(input):
    m = max(0, min(7, int(8*input)))
    return np.array([(m>>2)&1, (m>>1)&1, m&1]) * 2 - 1

vadc = lambda v: np.array([adc(p) for p in v])

x = np.arange(0, 1, 1.0/(8*8)).reshape(-1,1) # 8 points in each output region
training_batch = (x, vadc(x))

In [None]:
code_strings.append(In[-2])

## First net node

In [None]:
def add_net_start_to_graph(driver, facts):
    q = """
MATCH (par:Parameters {unikey: $parameters_unikey})
CREATE (par)-[:CONFIGURES]->(:Net 
            {shorthand: $shorthand,
                unikey: $unikey,
                   ksv: $ksv,
                  loss: $loss,
                    ts: timestamp(),
                  head: $head,
    batches_from_start: 0})
"""
    d = {'unikey': secrets.token_urlsafe(16)}
    d.update(**facts)
    nj.query_write(driver, q, **d)
    return d['unikey']

In [None]:
code_strings.append(In[-2])

## Trained net nodes

In [None]:
def add_net_subsequent_to_graph(driver, facts):
    q = """
MATCH (a:Net {unikey: $prior_unikey})
CREATE (a)-[:LEARNED
         {batch_points: $batch_points,
                  etas: $etas,
    eta_change_batches: $eta_change_batches,
  batches_this_segment: $batches_this_segment,
                losses: $loss,
            loss_steps: $loss_step,
           traj_L2_sqs: $traj_L2_sq,
   traj_cos_sq_signeds: $traj_cos_sq_signed,
                    ts: timestamp()}]->
(b:Net
            {shorthand: $shorthand,
                unikey: $unikey,
                   ksv: $ksv,
                  loss: $end_loss,
                    ts: timestamp(),
    batches_from_start: $batches_from_start})
"""
    d = {'unikey': secrets.token_urlsafe(16)}
    d.update(**facts)
    nj.query_write(driver, q, **d)
    return d['unikey']

In [None]:
code_strings.append(In[-2])

## Train, recording trajectory

In [None]:
def train_net_an_increment(net, nps, properties):
    loss = net.losses([training_batch])[0]
    batch_ctr = 0
    while loss > 1e-3:
        batch_ctr_at_seg_start = batch_ctr
        losses = []
        etas = []
        deltas = []
        prior_loss = loss
        while loss / prior_loss > 0.7071 and len(deltas) < 100:
            if not etas or net.eta != etas[-1][1]:
                etas.append([batch_ctr, net.eta])
            loss = net.learn([training_batch])
            if batch_ctr < 100 or batch_ctr % 100 == 0:
                losses.append([batch_ctr, loss])
                deltas.append([batch_ctr, net.deltas()])
            batch_ctr += 1
        #if losses[-1][0] < (batch_ctr-1):
        #    losses.append([batch_ctr, loss])
        if not deltas or deltas[-1][0] < (batch_ctr-1):
            deltas.append((batch_ctr, net.deltas()))
        properties = dict(zip(deltas[0][1]._fields, map(list, (zip(*(v[1] for v in deltas))))))
        #properties = {}
        properties['batch_points'] = [v[0] for v in deltas]
        #properties['etas'] = etas
        properties['etas'], properties['eta_change_batches'] = (list(v) for v in zip(*etas))
        properties['batches_this_segment'] = batch_ctr - batch_ctr_at_seg_start
        properties['ts'] = time.time()
        properties['shorthand'] = net.shorthand
        properties['ksv'] = nps.store(net.state_vector())
        properties['end_loss'] = net.losses([training_batch])[0]
        properties['experiment'] = 'ADC'
        yield properties

In [None]:
def trainer(net, nps):
    loss = net.losses([training_batch])[0]
    batch_ctr = 0
    for i in range(3):
        batch_ctr_at_seg_start = batch_ctr
        losses = []
        etas = []
        deltas = []
        prior_loss = loss
        while loss / prior_loss > 0.7071 and len(deltas) < 100:
            if not etas or net.eta != etas[-1][1]:
                etas.append([batch_ctr, net.eta])
            loss = net.learn([training_batch])
            if batch_ctr < 100 or batch_ctr % 100 == 0:
                losses.append([batch_ctr, loss])
                deltas.append([batch_ctr, net.deltas()])
            batch_ctr += 1
        #if losses[-1][0] < (batch_ctr-1):
        #    losses.append([batch_ctr, loss])
        if not deltas or deltas[-1][0] < (batch_ctr-1):
            deltas.append((batch_ctr, net.deltas()))
        properties = dict(zip(deltas[0][1]._fields, map(list, (zip(*(v[1] for v in deltas))))))
        #properties = {}
        properties['batch_points'] = [v[0] for v in deltas]
        #properties['etas'] = etas
        properties['etas'], properties['eta_change_batches'] = (list(v) for v in zip(*etas))
        properties['batches_this_segment'] = batch_ctr - batch_ctr_at_seg_start
        properties['ts'] = time.time()
        properties['shorthand'] = net.shorthand
        properties['ksv'] = nps.store(net.state_vector())
        properties['end_loss'] = net.losses([training_batch])[0]
        properties['batches_from_start'] = batch_ctr
        yield properties

In [None]:
def trainer(net, nps):
    loss = net.losses([training_batch])[0]
    batch_ctr = 0
    while loss > 1e-3 and batch_ctr < 100_000:
        batch_ctr_at_seg_start = batch_ctr
        losses = []
        etas = []
        deltas = []
        prior_loss = loss
        while loss / prior_loss > 0.7071 and len(deltas) < 100:
            if not etas or net.eta != etas[-1][1]:
                etas.append([batch_ctr, net.eta])
            loss = net.learn([training_batch])
            if batch_ctr < 100 or batch_ctr % 100 == 0:
                losses.append([batch_ctr, loss])
                deltas.append([batch_ctr, net.deltas()])
            batch_ctr += 1
        #if losses[-1][0] < (batch_ctr-1):
        #    losses.append([batch_ctr, loss])
        if not deltas or deltas[-1][0] < (batch_ctr-1):
            deltas.append((batch_ctr, net.deltas()))
        properties = dict(zip(deltas[0][1]._fields, map(list, (zip(*(v[1] for v in deltas))))))
        #properties = {}
        properties['batch_points'] = [v[0] for v in deltas]
        #properties['etas'] = etas
        properties['etas'], properties['eta_change_batches'] = (list(v) for v in zip(*etas))
        properties['batches_this_segment'] = batch_ctr - batch_ctr_at_seg_start
        properties['ts'] = time.time()
        properties['shorthand'] = net.shorthand
        properties['ksv'] = nps.store(net.state_vector())
        properties['end_loss'] = net.losses([training_batch])[0]
        properties['batches_from_start'] = batch_ctr
        yield properties

In [None]:
def trainer(net, nps):
    loss = net.losses([training_batch])[0]
    batch_ctr = 0
    batch_delta = 1.0
    while batch_ctr < batch_limit:
        batch_delta *= batch_delta_factor
        batch_ctr_at_seg_start = batch_ctr
        losses = []
        etas = []
        deltas = []
        prior_loss = loss
        for i in range(int(batch_delta)):
            if not etas or net.eta != etas[-1][1]:
                etas.append([batch_ctr, net.eta])
            loss = net.learn([training_batch])
            if batch_ctr < 100 or batch_ctr % 100 == 0:
                losses.append([batch_ctr, loss])
                deltas.append([batch_ctr, net.deltas()])
            batch_ctr += 1
        if not deltas or deltas[-1][0] < (batch_ctr-1):
            deltas.append((batch_ctr, net.deltas()))
        properties = dict(zip(deltas[0][1]._fields, map(list, (zip(*(v[1] for v in deltas))))))
        properties['batch_points'] = [v[0] for v in deltas]
        properties['eta_change_batches'], properties['etas'] = (list(v) for v in zip(*etas))
        properties['batches_this_segment'] = batch_ctr - batch_ctr_at_seg_start
        properties['ts'] = time.time()
        properties['shorthand'] = net.shorthand
        properties['ksv'] = nps.store(net.state_vector())
        properties['end_loss'] = net.losses([training_batch])[0]
        properties['batches_from_start'] = batch_ctr
        yield properties

In [None]:
code_strings.append(In[-2])

In [None]:
def get_starting_facts(net, nps):
    rv = {'shorthand': net.shorthand,
          'ksv': nps.store(net.state_vector()),
          'loss': net.losses([training_batch])[0],
          'head': True,
         }
    return rv

In [None]:
code_strings.append(In[-2])

## Build the runner

In [None]:
def create_starting_entry(driver, net, nps, get_starting_facts, q_add_start):
    starting_facts = get_starting_facts(net, nps)
    tj.query_write(driver, q_add_start, **starting_facts)
    return starting_facts['ksv']

In [None]:
#code_strings.append(In[-2])

In [None]:
def extend_by_one(driver, net, ksv, nps, observations, add_subsequent):
    with driver.session() as session:
        session.write_transaction(add_subsequent, observations, net)
        print(f"loss {observations['end_loss']}")
        return observations['ksv']

In [None]:
#code_strings.append(In[-2])

In [None]:
def run_it(cx, driver, nps):
    net = cx['net']
    add_start = cx['add_net_start_to_graph']
    add_subsequent = cx['add_net_subsequent_to_graph']
    get_starting_facts = cx['get_starting_facts']
    trainer = cx['trainer']

    with driver.session() as session:
        starting_facts = get_starting_facts(net, nps)
        starting_facts['parameters_unikey'] = cx['parameters_unikey']
        print(starting_facts)
        prior_unikey = add_start(driver, starting_facts)
        #prior_ksv = starting_facts['ksv']
        for observations in trainer(net, nps):
            #observations['prior_ksv'] = prior_ksv
            #prior_ksv = observations['ksv']
            observations['prior_unikey'] = prior_unikey
            prior_unikey = add_subsequent(driver, observations)
            #print(f"loss {observations['end_loss']}")

In [None]:
code_strings.append(In[-2])

* At this point, all the code we need to have in the procedure has been prepared and placed in `code_strings`. It is also in the current notebook context and we can test it here too.
* Some run-specific parameters will be placed in the Parameters nodes created by the methods below.

# Create the Experiment -> Procedure -> Parameters
Create an experiment, add a procedure, add parameters.

# Experiment setup

## Switches on what to set up now

In [None]:
setup_experiment = False
setup_procedure = False
setup_parameters = True

## Configuration

In [None]:
experiment_name = 't4'
procedure_name = 'Train Pauls ADCs'
parameters = defaultdict(dict)
parameters['eps 0.01']['prepend_code_strings'] = ["net_shorthand = '1x3sx3t'",
                                                 'batch_limit = 10_000',
                                                 'batch_delta_factor = 2**(1/5)']
parameters['eps 0.01']['append_code_strings'] = ['net.eps = 0.01']
parameters['eps 0.01']['trials'] = 1024
parameters['eps 0.03']['prepend_code_strings'] = ["net_shorthand = '1x3sx3t'",
                                                 'batch_limit = 10_000',
                                                 'batch_delta_factor = 2**(1/5)']
parameters['eps 0.03']['append_code_strings'] = ['net.eps = 0.03']
parameters['eps 0.03']['trials'] = 1024
parameters['eps 0.1']['prepend_code_strings'] = ["net_shorthand = '1x3sx3t'",
                                                 'batch_limit = 10_000',
                                                 'batch_delta_factor = 2**(1/5)']
parameters['eps 0.1']['append_code_strings'] = ['net.eps = 0.1']
parameters['eps 0.1']['trials'] = 1024
parameters['eps 0.3']['prepend_code_strings'] = ["net_shorthand = '1x3sx3t'",
                                                 'batch_limit = 10_000',
                                                 'batch_delta_factor = 2**(1/5)']
parameters['eps 0.3']['append_code_strings'] = ['net.eps = 0.3']
parameters['eps 0.3']['trials'] = 1024
parameters['eps 1.0']['prepend_code_strings'] = ["net_shorthand = '1x3sx3t'",
                                                 'batch_limit = 10_000',
                                                 'batch_delta_factor = 2**(1/5)']
parameters['eps 1.0']['append_code_strings'] = ['net.eps = 1.0']
parameters['eps 1.0']['trials'] = 1024

## Do setups according to the switches above

In [None]:
if setup_experiment:
    experiment_unikey = secrets.token_urlsafe(16)
    gu.create_an_experiment(driver,
        experiment_name=experiment_name,
        experiment_unikey=experiment_unikey,
    )
else:
    experiment_unikey = gu.get_experiment_key_from_name(driver, experiment_name)

In [None]:
if setup_procedure:
    procedure_unikey = secrets.token_urlsafe(16)
    gu.create_a_procedure(driver,
        experiment_unikey=experiment_unikey,
        procedure_name=procedure_name,
        procedure_unikey=procedure_unikey,
        code_strings=code_strings,
    )
else:
    procedure_unikey = dict(gu.get_procedure_names_keys_from_experiment_key(driver, experiment_unikey))[procedure_name]

In [None]:
if setup_parameters:
    for name, params in parameters.items():
        trials = params['trials']
        if isinstance(trials, int):
            trials = (trials,)
        for i in range(*trials):
            parameters_unikey = secrets.token_urlsafe(16)
            random_seed = abs(np.frombuffer(secrets.token_bytes(4), dtype='int32')[0])
            prepend_code_strings = [s for s in params['prepend_code_strings']]
            append_code_strings = [s for s in params['append_code_strings']]
            prepend_code_strings.append(f"random_seed = {random_seed}")
            prepend_code_strings.append(f"trial_number = {i}")
            prepend_code_strings.append(f"parameters_unikey = '{parameters_unikey}'")
            #print(i, code_strings)
            gu.create_parameters_to_experiment_procedure(driver,
                procedure_unikey=procedure_unikey,
                parameters_name=f"{name} {i}",
                parameters_unikey=parameters_unikey,
                prepend_code_strings=prepend_code_strings,
                append_code_strings=append_code_strings,
                trial=i)

---

# Here the experiment is set up in the graph database

We could:
* Run it locally here
* Run it from the code strings we have stored here
* Run it like the bot would, by getting the code from the database
* Launch the bot against it

In [None]:
run = 'no'

In [None]:
if run == 'local':
    nps = nj.NumpyStore(driver)
    run_it(globals(), driver, nps)

In [None]:
if run == 'code_strings':
    nps = nj.NumpyStore(driver)
    cx = {}
    for s in code_strings:
        exec(s, cx)
    cx['run_it'](cx, driver, nps)

## Find work and do it
As the bot would

In [None]:
def get_code_strings_from_db(driver, experiment_name, procedure_name):
    experiment_unikey = get_experiment_key_from_name(driver, name=experiment_name)
    #print(experiment_unikey)
    procedures = dict(get_procedure_names_keys_from_experiment_key(driver, key=experiment_unikey))
    procedure_unikey = procedures[procedure_name]
    #print(procedure_unikey)
    unstarted_parameters = get_unstarted_parameters_of_procedure(driver, procedure_unikey=procedure_unikey)
    parameters_unikey = unstarted_parameters[0]
    #print(parameters_unikey)
    code_strings_from_db = get_code_strings_of_experiment_procedure_parameters(driver,
        experiment_unikey=experiment_unikey,
        procedure_unikey=procedure_unikey,
        parameters_unikey=parameters_unikey)
    return code_strings_from_db

In [None]:
def now_run_it(driver, code_strings):
    cx = {}
    for s in code_strings:
        exec(s, cx)
    nps = nj.NumpyStore(driver)
    cx['run_it'](cx, driver, nps)

In [None]:
if run == 'like bot':
    code_strings = get_code_strings_from_db(driver, 't2', 'Train ADCs')
    now_run_it(driver, code_strings)

In [None]:
if run == 'bot':
    raise NotImplementedError

In [None]:
if True:
    experiment_unikey = gu.get_experiment_key_from_name(driver, name=experiment_name)
    print(experiment_unikey)
    procedures = dict(gu.get_procedure_names_keys_from_experiment_key(driver, key=experiment_unikey))
    procedure_unikey = procedures[procedure_name]
    unstarted_parameters = gu.get_unstarted_parameters_of_procedure(driver, procedure_unikey=procedure_unikey)
    parameters_unikey = unstarted_parameters[0]


In [None]:
code_strings_from_db = gu.get_code_strings_from_db(driver, experiment_name, procedure_name)

In [None]:
print(('\n\n/' + '*'*80 + '/\n').join(code_strings_from_db))

___

# Stop

In [None]:
assert False, "stop here"

In [None]:
abs(-3)