# $U(1)$ Gauge Model using L2HMC in graph mode

--------------------------------------------------------------------
### TODO:
* [ ] Look at performance on Cooley (longer training runs).
* [ ] Fit observables to Eq. \ref{eq:therm_time} to determine the thermalization time $\tau$.

\begin{equation} 
f(t) \equiv A \exp^{-t / \tau}+ \,\, B
\label{eq:therm_time}
\end{equation}
* [ ] Look at defining a distance metric as the difference in topological charge between two samples and see what effect adding this as an additional term to the loss function has on the models' ability to tunnel between topological sectors.

--------------------------------------------------------------------

In [None]:
import os
import sys
import time
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from scipy.special import i0, i1

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

import utils.gauge_model_helpers as helpers
from utils.data_utils import (
    calc_avg_vals_errors, block_resampling, jackknife_err
)
from gauge_model import GaugeModel

tfe = tf.contrib.eager
%autoreload 2
%matplotlib notebook

## Define parameters

In [None]:
tf.reset_default_graph()
params = {
    # --------------------- Lattice parameters ----------------------------
    'time_size': 8,
    'space_size': 8,
    'link_type': 'U1',
    'dim': 2,
    'num_samples': 56,
    'rand': False,
    'data_format': 'channels_last',
    # --------------------- Leapfrog parameters ---------------------------
    'num_steps': 4,
    'eps': 0.25,
    'loss_scale': 1.,
    # --------------------- Learning rate parameters ----------------------
    'lr_init': 1e-3,
    'lr_decay_steps': 1000,
    'lr_decay_rate': 0.96,
    # --------------------- Annealing rate parameters ---------------------
    'annealing': True,
    'beta_init': 2.,
    'beta_final': 4.,
    # --------------------- Training parameters ---------------------------
    'train_steps': 10000,
    'save_steps': 1000,
    'logging_steps': 50,
    'print_steps': 1,
    'training_samples_steps': 1000,
    'training_samples_length': 500,
    # --------------------- Model parameters ------------------------------
    'network_arch': 'generic',
    'hmc': False,
    'eps_trainable': True,
    'metric': 'cos_diff',
    'std_weight': 1.,
    'aux_weight': 1.,
    'charge_weight': 1.,
    'summaries': True,
    'clip_grads': False,
    'clip_value': None,
}

## Create model

In [None]:
os.environ["KMP_BLOCKTIME"] = str(0)
os.environ["KMP_SETTINGS"] = str(1)
os.environ["KMP_AFFINITY"] = "granularity=fine,verbose,compact,1,0"
os.environ["OMP_NUM_THREADS"] = str(2)
config = tf.ConfigProto()
tf.reset_default_graph()

In [None]:
# 15.7 s to create for 8x8, num_samples=100, num_steps=1
# 15.2 s to create for 8x8, num_samples=10, num_steps=3
log_dir = os.path.join('/', 'tmp')
model = GaugeModel(params=params, 
                   config=config,
                   sess=None,
                   log_dir=log_dir,
                   restore=False)

#save_params_to_pkl_file(params, model.info_dir)

In [None]:
model.train(5)

In [None]:
from tensorflow.keras.utils import plot_model

In [None]:
import pydot_ng as pydot

In [None]:
def _check_pydot():
  try:
    # Attempt to create an image of a blank graph
    # to check the pydot/graphviz installation.
    pydot.Dot.create(pydot.Dot())
  except Exception:
    # pydot raises a generic Exception here,
    # so no specific class can be caught.
    raise ImportError('Failed to import pydot. You must install pydot'
                      ' and graphviz for `pydotprint` to work.')

In [None]:
def model_to_dot(model, show_shapes=False, show_layer_names=True, rankdir='TB'):
  """Convert a Keras model to dot format.

  Arguments:
      model: A Keras model instance.
      show_shapes: whether to display shape information.
      show_layer_names: whether to display layer names.
      rankdir: `rankdir` argument passed to PyDot,
          a string specifying the format of the plot:
          'TB' creates a vertical plot;
          'LR' creates a horizontal plot.

  Returns:
      A `pydot.Dot` instance representing the Keras model.
  """
  from tensorflow.python.keras.layers.wrappers import Wrapper
  from tensorflow.python.keras.models import Sequential

  _check_pydot()
  dot = pydot.Dot()
  dot.set('rankdir', rankdir)
  dot.set('concentrate', True)
  dot.set_node_defaults(shape='record')

  if isinstance(model, Sequential):
    if not model.built:
      model.build()
  layers = model.layers

  # Create graph nodes.
  for layer in layers:
    layer_id = str(id(layer))

    # Append a wrapped layer's label to node's label, if it exists.
    layer_name = layer.name
    class_name = layer.__class__.__name__
    if isinstance(layer, Wrapper):
      layer_name = '{}({})'.format(layer_name, layer.layer.name)
      child_class_name = layer.layer.__class__.__name__
      class_name = '{}({})'.format(class_name, child_class_name)

    # Create node's label.
    if show_layer_names:
      label = '{}: {}'.format(layer_name, class_name)
    else:
      label = class_name

    # Rebuild the label as a table including input/output shapes.
    if show_shapes:
      try:
        outputlabels = str(layer.output_shape)
      except AttributeError:
        outputlabels = 'multiple'
      if hasattr(layer, 'input_shape'):
        inputlabels = str(layer.input_shape)
      elif hasattr(layer, 'input_shapes'):
        inputlabels = ', '.join([str(ishape) for ishape in layer.input_shapes])
      else:
        inputlabels = 'multiple'
      label = '%s\n|{input:|output:}|{{%s}|{%s}}' % (label, inputlabels,
                                                     outputlabels)
    node = pydot.Node(layer_id, label=label)
    dot.add_node(node)

  # Connect nodes with edges.
  for layer in layers:
    layer_id = str(id(layer))
    for i, node in enumerate(layer._inbound_nodes):
      node_key = layer.name + '_ib-' + str(i)
      if node_key in model._network_nodes:  # pylint: disable=protected-access
        for inbound_layer in node.inbound_layers:
          inbound_layer_id = str(id(inbound_layer))
          layer_id = str(id(layer))
          dot.add_edge(pydot.Edge(inbound_layer_id, layer_id))
  return dot


def plot_model(model,
               to_file='model.png',
               show_shapes=False,
               show_layer_names=True,
               rankdir='TB'):
  """Converts a Keras model to dot format and save to a file.

  Arguments:
      model: A Keras model instance
      to_file: File name of the plot image.
      show_shapes: whether to display shape information.
      show_layer_names: whether to display layer names.
      rankdir: `rankdir` argument passed to PyDot,
          a string specifying the format of the plot:
          'TB' creates a vertical plot;
          'LR' creates a horizontal plot.
  """
  dot = model_to_dot(model, show_shapes, show_layer_names, rankdir)
  _, extension = os.path.splitext(to_file)
  if not extension:
    extension = 'png'
  else:
    extension = extension[1:]
  dot.write(to_file, format=extension)


In [None]:
plot_model(model.dynamics, to_file='dynamics_model.png',
           show_shapes=True, show_layer_names=True, rankdir='LR')
#plot_model(model, to_file='model_plot.png', show_shapes=True, show_layer_names=True)

In [None]:
plot_model(model.dynamics.position_fn, to_file='dynamics_position_fn.png',
           show_shapes=True, show_layer_names=True, rankdir='LR')

In [None]:
plot_model(model.dynamics.momentum_fn, to_file='dynamics_momentum_fn.png',
           show_shapes=True, show_layer_names=True)

In [None]:
model.train_data_dict

In [None]:
np.array(list(model.train_data_dict['charges'].values()), dtype=np.int32)

In [None]:
model._current_state['samples']

In [None]:
def top_charge(x, fft=False):
    plaq_sums = (x[:, :, :, 0]
                 - x[:, :, :, 1]
                 - np.roll(x[:, :, :, 0], shift=-1, axis=2)
                 + np.roll(x[:, :, :, 1], shift=-1, axis=1))

    if fft:
        ps_proj = project_angle_approx(plaq_sums)
    else:
        ps_proj = project_angle(plaq_sums)

    top_charges = np.floor(0.1 + (np.sum(ps_proj, axis=(1, 2)) / (2 * np.pi)))
    #top_charges = tf.floor(
    #            0.1 + (tf.reduce_sum(project_angle(self._calc_plaq_sums(x)),
    #                                 axis=(1, 2), name='top_charges')
    #                   / (2 * np.pi))
    #        )
    
    return top_charges

In [None]:
q0 = top_charge(np.random.randn(*samples.shape))

In [None]:
q0

In [None]:
samples = model._current_state['samples'].reshape(-1, 8, 8, 2)

In [None]:
q = np.array(top_charge(samples, fft=False), dtype=np.int32)

In [None]:
q

In [None]:
q = np.array(top_charge(np.mod(samples, 2*np.pi), fft=False), dtype=np.int32)

In [None]:
q

In [None]:
%debug

In [None]:
model.lattice.num_plaquettes

In [None]:
model.lattice.calc_plaq_observables()

In [None]:
model.dynamics.summary()

In [None]:
model.dynamics.built

In [None]:
model.dynamics.layers

In [None]:
model.dynamics.position_fn.layers

In [None]:
model.dynamics.position_fn.summary()

In [None]:
samples_np = np.reshape(
    np.array(model.lattice.samples, dtype=np.float32),
    (model.num_samples, model.x_dim)
)

beta_np = model.beta_init

In [None]:
fd = {model.x: samples_np,
      model.beta: beta_np}

outputs = model.sess.run([
    model.train_op,         # apply gradients
    model.loss_op,          # calculate loss
    model.x_out,            # get new samples
    model.px,               # calculate accept. prob
    model.dynamics.eps,     # evaluate current step size
    model.actions_op,       # calculate avg. actions
    model.plaqs_op,         # calculate avg. plaquettes
    model.charges_op,       # calculate top. charges
    model.lr,               # evaluate learning rate
    model.charge_diff_op,   # change in top charge / num_samples 
], feed_dict=fd)
#
loss_np = outputs[1]
samples_np = outputs[2]
px_np = outputs[3]
eps_np = outputs[4]
actions_np = outputs[5]
plaqs_np = outputs[6]
charges_np = outputs[7]
lr_np = outputs[8]
charge_diff = outputs[9]

In [None]:
loss_np

In [None]:
charges_np

In [None]:
import utils.file_io as io
io.save_params_to_pkl_file(params, model.info_dir)

In [None]:
# ~ 0.3xx s / step for 8x8, num_samples=100, num_steps=1
# ~ 1.3x s / step for 8x8, num_samples=100, num_steps=5
# ~ 0.97x s / step for 8x8, num_samples=128, num_steps=3
# ~ 0.75x s / step for 8x8, num_samples=20, num_steps=5
# ~ 1.5x s / step for 8x8, num_samples=20, num_steps=10
model.train(model.train_steps)

In [None]:
%debug

In [None]:
%debug

In [None]:
betas = [3., 4.]
for beta in betas:
    model.run(50000, beta=beta)

In [None]:
tun_events_keys = np.array(list(model.tunn_events_dict.keys()))
tun_events_vals = np.array(list(model.tunn_events_dict.values()))

In [None]:
len(tun_events_vals)

In [None]:
fig, ax = plt.subplots()
ax.plot(tun_events_vals, marker='.', fillstyle='none', ls='')
ax.set_xlabel('Training step', fontsize=14)
ax.set_ylabel('Number of events', fontsize=14)
title_str = (f'Number of tunneling events vs. '
             f'training step for {model.num_samples} samples')
ax.set_title(title_str, fontsize=16)
out_file = os.path.join(model.figs_dir, 'tunneling_events_vs_training_step.png')
print(f"Saving figure to: {out_file}.")
plt.savefig(out_file, dpi=400, bbox_inches='tight')

In [None]:
tun_events_keys[:10, 1]

In [None]:
steps = tun_events_keys[:, 0]
betas = tun_events_keys[:, 1]

In [None]:
fig = plt.figure()
ax1 = plt.subplot(211)
ax2 = plt.subplot(212)

ax1.plot(steps, tun_events_vals / model.num_samples, 
         marker='.', fillstyle='none', ls='')
ax1.set_xlabel('Training step', fontsize=14)
ax2.set_xlabel('Beta', fontsize=16)
ax1.set_ylabel('# of tunneling events', fontsize=14)
#ax1.set_title(title_str, fontsize=16)

ax1.get_shared_x_axes().join(ax1, ax2)
ax1.set_xticklabels([])
ax2.autoscale() ## call autoscale if needed

In [None]:
obs, stats = model.run(1000, beta=3.)

In [None]:
obs, stats = model.run(100, beta=3.)

In [None]:
model.tunn_ev

In [None]:
observables = []
stats = []
betas = [3., 4.]
steps = [5e3, 1e4, 5e4]
for step in steps:
    for beta in betas:
        obs, stats = model.run(step, beta=beta)
        

In [None]:
dir_strings = ('steps_50000_beta_4.0', 'steps_50000_beta_3.0')
obs_dirs = (os.path.join(model.eval_dir, 'observables', dir_strings[0]),
            os.path.join(model.eval_dir, 'observables', dir_strings[1]))
samples_history_files = (
    os.path.join('samples', 'samples_history_' + dir_strings[0] + '.npy'),
    os.path.join('samples', 'samples_history_' + dir_strings[1] + '.npy')
)
actions_files = (
    os.path.join(obs_dirs[0], 'actions_' + dir_strings[0] + '.pkl'),
    os.path.join(obs_dirs[1], 'actions_' + dir_strings[1] + '.pkl')
)
plaqs_files = (
    os.path.join(obs_dirs[0], 'plaqs_' + dir_strings[0] + '.pkl'),
    os.path.join(obs_dirs[1], 'plaqs_' + dir_strings[1] + '.pkl')
)
charges_files = (
    os.path.join(obs_dirs[0], 'charges_' + dir_strings[0] + '.pkl'),
    os.path.join(obs_dirs[1], 'charges_' + dir_strings[1] + '.pkl'))
tunn_events_files = (
    os.path.join(obs_dirs[0], 'tunn_events_' + dir_strings[0] + '.pkl'),
    os.path.join(obs_dirs[1], 'tunn_events_' + dir_strings[1] + '.pkl')
)

In [None]:
import pickle

samples = []
for f in samples_files:
    samples.append(np.load(f))
actions = []
for f in actions_files:
    with open(f, 'rb') as ff:
        actions.append(pickle.load(ff))
plaqs = []
for f in plaqs_files:
    with open(f, 'rb') as ff:
        plaqs.append(pickle.load(ff))
charges = []
for f in charges_files:
    with open(f, 'rb') as ff:
        charges.append(pickle.load(ff))
tunn_events = []
for f in tunn_events_files:
    with open(f, 'rb') as ff:
        tunn_events.append(pickle.load(ff))

In [None]:
samples = []
for f in samples_history_files:
    samples.append(np.load(f))

In [None]:
 
tunn_events = []
for f in tunn_events_files:
    with open(f, 'rb') as ff:
        tunn_events.append(pickle.load(ff))

In [None]:
samples = []
for f in samples_files:
    with open(f, 'rb') as ff:
        samples.append(pickle.load(ff))
tunn_events = []
for f in tunn_events_files:
    with open(f, 'rb') as ff:
        tunn_events.append(pickle.load(ff))

In [None]:
model.run(100, beta=3.)

In [None]:
model.run(500, beta=3.)

In [None]:
model.run(1000, beta=4.)

In [None]:
model.run(50000, beta=4.)

In [None]:
model.run(50000, beta=4.)

In [None]:
%debug

In [None]:
model.run(50000, beta=model.beta_final)

In [None]:
import pickle
with open(model.files['samples_pkl_file'], 'rb') as f:
    samples_init = pickle.load(f)
beta_init = model.update_beta(model.data['step'])
beta_init
model.data['learning_rate']
model.data['step']

In [None]:
model.run(50000, beta=3.)

In [None]:
model.run(50000, beta=4.)

In [None]:
model.run(50000, beta=3.5)

In [None]:
model.run(50000, beta=model.beta_final)

In [None]:
#model.train(model.train_steps, samples_init=samples_init, beta_init=beta_init, 
#            pre_train=False, trace=False)

In [None]:
model.run(20000, beta=model.beta_final)

In [None]:
model.run(50000, beta=model.beta_final)

In [None]:
# 37m 39s for 8x8 with num_steps=1 using slow _total_action method
model.run(50000, beta=3.)

In [None]:
model.run(20000, beta=3.)

In [None]:
#betas = [2., 4.]
betas = [model.beta_final]

In [None]:
run_steps = [500, 1000, 5000, 10000]#, 20000]#, 50000]#, 6000#, 10000]

In [None]:
# for 8x8 lattice, run 20000 eval steps in ~14m 20s
# for 8x8 lattice, run 30000 eval steps in ~21m 31s
# for 8x8 lattice, run 50000 eval steps in ~40m 12s
for beta in betas:
    for steps in run_steps:
        model.run(steps, beta=beta)

In [None]:
model.sess.run(model.dynamics.eps)

In [None]:
model.sess.run(tf.log(model.dynamics.eps))

In [None]:
model.run(20000, beta=model.beta_final)

In [None]:
model.run(30000, beta=model.beta_final)

In [None]:
model.dynamics.momentum_fn.summary()

In [None]:
model.run(20000)

In [None]:
model.run(30000)

In [None]:
model.run(50000)

In [None]:
for step in run_steps:
    model.run(step, beta=2.)

In [None]:
model.run(10000, beta=2.)

In [None]:
model.run(40000, beta=2.)

In [None]:
model.run(1000, beta=2.)

In [None]:
for steps in run_steps:
    _ = model.run(steps)

In [None]:
_ = model.run(10000)

In [None]:
_ = model.run(20000)

In [None]:
_ = model.run(8000)

In [None]:
step = model.sess.run(model.global_step)
model._save_model(samples=None, step=step)

#helpers.write_run_data(model.files['run_info_file'], model.data)

In [None]:
samples_np = np.array(model.lattice.samples, dtype=np.float32)
fd = {model.x: samples_np, model.beta: 8.}
model.sess.run(model.dynamics.position_fn.conv_x1, feed_dict=fd)

In [None]:
samples_np = np.array(model.lattice.samples, dtype=np.float32)

In [None]:
conv_x1_kernel, conv_x1_bias = model.dynamics.position_fn.conv_x1.weights
conv_x1_kernel_np = model.sess.run(conv_x1_kernel)
conv_x1_kernel_np.shape

In [None]:
conv_x2_kernel, conv_x2_bias = model.dynamics.position_fn.conv_x2.weights
conv_x2_kernel_np = model.sess.run(conv_x2_kernel)
conv_x2_kernel_np.shape

In [None]:
#conv_x1_out = model.dynamics.position_fn.conv_x1(model.x)
conv_x1_out_np = model.sess.run(model.dynamics.position_fn.conv_x1, feed_dict={model.x: samples_np,
                                                                                      model.beta: 8.})
conv_x1_out_np.shape

In [None]:
conv_x1_out = model.sess.run(model.dynamics.position_fn.conv_x1.output, feed_dict={model.x: samples_np, model.beta: 8.})

In [None]:
num_filters = conv_x2_kernel_np.shape[2]
channels = range(conv_x2_kernel_np.shape[2])
w_max = np.max(conv_x2_kernel_np)
w_min = np.min(conv_x2_kernel_np)

for channel in channels:
    # create figure and axes
    fig, axes = plt.subplots(4, 4)
    for idx, ax in enumerate(axes.flat):
        img = conv_x2_kernel_np[:, :, channel, idx]
        _ = ax.imshow(img, vmin=w_min, vmax=w_max, 
                      interpolation='nearest', cmap='seismic')
        _ = ax.set_xticks([])
        _ = ax.set_yticks([])
        _ = ax.set_title(f'{channel}, {idx}')
    fig.tight_layout()

In [None]:

# iterate channels
for channel in channels:
    # iterate filters inside every channel
    for l, ax in enumerate(axes.flat):
        # get a single filter
        img = weights[:, :, channel, l]
        # put it on the grid
        ax.imshow(img, vmin=w_min, vmax=w_max, interpolation='nearest', cmap='seismic')
        # remove any labels from the axes
        ax.set_xticks([])
        ax.set_yticks([])
    # save figure
    plt.savefig(os.path.join(plot_dir, '{}-{}.png'.format(name, channel)), bbox_inches='tight')

In [None]:
w0 = conv_x1_kernel_np[:, :, 0]
w_min = np.min(w0)
w_max = np.max(w0)

fig, ax = plt.subplots()
_ = ax.imshow(conv_x1_kernel_np[:, :, 0], vmin=w_min, vmax=w_max, 
          interpolation='nearest', cmap='seismic')
# remove any labels from the axes
_ = ax.set_xticks([])
_ = ax.set_yticks([])

In [None]:
model.dynamics.position_fn.dense

In [None]:
import pickle

with open(model.files['parameters_pkl_file'], 'wb') as f:
    pickle.dump(model.params, f)

In [None]:
model.sess.graph.collections

In [None]:
model.sess.graph.get_collection

In [None]:
model.dynamics.position_fn.summary()

In [None]:
# Iterate over samples history and calculate observables for each sample.
# `lattice.calc_plaq_observables(samples)` calculates observables for each of
# the samples in the mini-batch.
actions_history = []
avg_plaquettes_history = []
top_charges_history = []
for idx, samples in enumerate(samples_history):
    t0 = time.time()
    observables = np.array(model.lattice.calc_plaq_observables(samples))
    actions, plaqs, charges = observables
    
    actions_history.append(actions)
    avg_plaquettes_history.append(plaqs)
    top_charges_history.append(charges)
    
    print(f'step: {idx}  '
          f'time / step: {time.time() - t0:^6.4g}  '
          f'avg action: {np.mean(actions):^6.4g}  '
          f'avg plaquette: {np.mean(plaqs):^6.4g} '
          f'top charge: {np.mean(charges):^6.4g}')

In [None]:
_ = helpers.plot_run_data(model.data, 
                          model.params, 
                          model.steps_arr, 
                          model.figs_dir, 
                          skip_steps=1)

In [None]:
#tf.reset_default_graph()
#model = GaugeModel(params=params,
#                   config=None,
#                   sess=None,
#                   conv_net=False,
#                   hmc=False,
#                   log_dir='../../gauge_logs_graph/run_25',
#                   restore=True)

In [None]:
samples = np.random.randn(*model.samples.shape)
samples_history = []

In [None]:
for i in range(500):
    t0 = time.time()
    samples = model.sess.run(model.x_out, feed_dict={model.x: samples})
    samples_history.append(samples)
    print(f'step: {i:^6.4g} time/step: {time.time() - t0:^6.4g}')

In [None]:
samples_history_conv = np.array(samples_history_conv)
print(samples_history_conv.shape)

In [None]:
import pickle
samples_history_file = os.path.join(model.info_dir, 'samples_history.pkl')
with open(samples_history_file, 'wb') as f:
    pickle.dump(samples_history_conv, f)