In [3]:
%matplotlib inline
%load_ext tensorboard
!pip install -q tensorflow==2.0.0-beta1
from __future__ import absolute_import, division, print_function, unicode_literals
import sys, os, datetime, time, scipy
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow as tf
from scipy import stats
from scipy.integrate import quad

[K     |████████████████████████████████| 87.9MB 1.3MB/s 
[K     |████████████████████████████████| 501kB 44.5MB/s 
[K     |████████████████████████████████| 3.1MB 42.7MB/s 
[?25h

### Benchmark Errors (MSE only)

In [0]:
def plot_history(history):
  hist = pd.DataFrame(history.history)
  hist['epoch'] = history.epoch
  plt.figure()
  plt.xlabel('Epoch')
  plt.ylabel('Loss')
  plt.plot(hist['epoch'], hist['loss'], label='Train Error')
  plt.plot(hist['epoch'], hist['val_loss'], label = 'Val Error')
  plt.ylim([0,np.max(hist['loss'])])
  plt.legend()
  plt.show()

def build_model(if_print=False):
  model = tf.keras.Sequential([
    tf.keras.layers.Dense(5, activation='relu', input_shape=[1]),
    tf.keras.layers.Dense(1)])
  #optimizer = tf.keras.optimizers.RMSprop(0.001)
  optimizer = tf.keras.optimizers.SGD(0.001)
  print(optimizer)
  model.compile(loss='mse',optimizer=optimizer)
  if if_print: model.summary()
  return model

def get_data(data_size=10000, seed=0):
  f=lambda x:np.sin(x)/(1+x**2)
  np.random.seed(seed)
  x = np.random.uniform(-1, 1, data_size)
  y = [f(i) for i in x]
  print("x[:2]:", x[:2])
  return x,y

def get_benchmark_NN(x, y):
  model = build_model(1)
  history = model.fit(x, y, batch_size = 500, epochs=20000, validation_split = 0.2, verbose=0)
  plot_history(history)
   
  return model

def print_model(model):
  model.summary()
  print("layer 1 weights:", ', '.join([str(item) for item in model.layers[0].trainable_variables[0].numpy()]))
  print("layer 1 bias:", ', '.join([str(item) for item in model.layers[0].trainable_variables[1].numpy()]))
  print("layer 2 weights:", ', '.join([str(item) for item in model.layers[1].trainable_variables[0].numpy()]))
  print("layer 2 bias:", ', '.join([str(item) for item in model.layers[1].trainable_variables[1].numpy()]))
    
def print_first_layer(model):
  print("First layer:", ', '.join([str(item) for item in model.layers[0].trainable_variables[0].numpy()]))
    
def get_errors(model, x, y, if_print=False):
  y_pred = model.predict(x)
  y_pred = y_pred.reshape(-1)
  errors = y - y_pred
  mean = np.mean(errors)
  sigma = np.sqrt(np.var(errors))
  if if_print:
    print("\nmean of errors:", mean)
    print("standard deviation of errors:", sigma)
  
  return errors, mean, sigma

def get_kde(errors):
  kde=stats.gaussian_kde(errors)
  kde.set_bandwidth(bw_method=kde.factor/10)
  return kde
def plot_errors(errors):
  errors_sigma = np.sqrt(np.var(errors))
  plt.figure(figsize=(20, 8))
  plt.xticks(fontsize=10)
  plt.subplots_adjust(hspace=1)

  p1 = plt.subplot(311)
  p1.title.set_text("Errors Histogram")
  weights = np.ones_like(errors)/float(len(errors))
  _, _, _ = plt.hist(errors, weights=weights,bins=100, color='red')

  p2 = plt.subplot(312)
  p2.title.set_text('Errors PDF')

  x1 = np.linspace(np.min(errors), np.max(errors), num=10000)
  kde = get_kde(errors)
  y1 = kde(x1)
  plt.plot(x1, y1, color='black', linewidth=1)
  plt.plot(errors, np.full_like(errors, -0.1), '|k', markeredgewidth=1)

  p3 = plt.subplot(313)
  p3.title.set_text("Normal Distribution")
  y_norm = stats.norm.pdf(x1, 0, errors_sigma/4)
  plt.plot(x1, y1, x1, y_norm, color='black', linewidth=1)
  plt.fill_between(x1, y1, y_norm, where=y_norm >= y1, facecolor='blue', interpolate=True)
  plt.fill_between(x1, y1, y_norm, where=y_norm <= y1, facecolor='red', interpolate=True)

### Benchmark Loss (Target Normal) and Gradient

In [0]:
def get_loss(errors, if_plot=False):
  def normal_minus_kde(x, mean, std, errors):
    kde = get_kde(errors)
    value = np.square(scipy.stats.norm.pdf(x,mean,std) - kde(x))

    return value
  
  errors_sigma = np.sqrt(np.var(errors))
  loss, err = quad(normal_minus_kde, -1, 1, args=(0, errors_sigma/4, errors))

  if if_plot:
    kde = get_kde(errors)
    x1 = np.linspace(np.min(errors), np.max(errors), num=10000)
    y1 = kde(x1)
    
    plt.figure(figsize=(20, 4))
    plt.xticks(fontsize=10)
    plt.subplots_adjust(hspace=0.3)

    p1 = plt.subplot(211)
    diffs=[np.abs(stats.norm.pdf(i, 0, errors_sigma/4)-kde(i))[0] for i in x1]
    p1.plot(x1, diffs)
    plt.xlim(np.min(errors), np.max(errors))
    p1.title.set_text("Difference between Normal and Errors")

    p2 = plt.subplot(212)
    y_norm = stats.norm.pdf(x1, 0, errors_sigma/4)
    p2.plot(x1, y1, x1, y_norm, color='black', linewidth=1)
    p2.fill_between(x1, y1, y_norm, where=y_norm >= y1, facecolor='blue', interpolate=True)
    p2.fill_between(x1, y1, y_norm, where=y_norm <= y1, facecolor='red', interpolate=True)
    plt.xlim(np.min(errors), np.max(errors))
  
  return loss

def node_plus_delta(model, layer_n, wb_n, node_n, if_print=False, delta = 10**(-6)):

  weight_old = model.layers[layer_n].trainable_variables[wb_n].numpy()
  delta_extend = np.zeros(weight_old.shape)
  
  if layer_n == 0:
    if wb_n==0:
      delta_extend[0][node_n] = delta
    elif wb_n==1:
      delta_extend[node_n] = delta
  elif layer_n == 1:
    if wb_n==0:  
      delta_extend[node_n] = delta
    elif wb_n==1:
      delta_extend = delta
      
  weight_new = weight_old + delta_extend
  if if_print: print("\n OLD: model.layers[", layer_n, "].trainable_variables[", wb_n, "] with node_n as", node_n, ":\n", model.layers[layer_n].trainable_variables[wb_n])
  model.layers[layer_n].trainable_variables[wb_n].assign(weight_new)
  if if_print: print("\n NEW: model.layers[", layer_n, "].trainable_variables[", wb_n, "] with node_n as", node_n, ":\n", model.layers[layer_n].trainable_variables[wb_n])
    
  return model

def node_minus_delta(model, layer_n, wb_n, node_n, if_print=False, delta = 10**(-6)):
  weight_old = model.layers[layer_n].trainable_variables[wb_n].numpy()
  delta_extend = np.zeros(weight_old.shape)

  if layer_n == 0:
    if wb_n==0:
      delta_extend[0][node_n] = delta
    elif wb_n==1:
      delta_extend[node_n] = delta
  elif layer_n == 1:
    if wb_n==0:  
      delta_extend[node_n] = delta
    elif wb_n==1:
      delta_extend = delta
    
  weight_new = weight_old - delta_extend
  if if_print: print("\nmodel.layers[", layer_n, "].trainable_variables[", wb_n, "] with node_n as", node_n, ":\n", model.layers[layer_n].trainable_variables[wb_n])
  model.layers[layer_n].trainable_variables[wb_n].assign(weight_new)
  if if_print: print("\nmodel.layers[", layer_n, "].trainable_variables[", wb_n, "] with node_n as", node_n, ":\n", model.layers[layer_n].trainable_variables[wb_n])
  return model

def node_plus_delta_loss(model, x, y, layer_n, wb_n, node_n, delta_n=1):
  for i in range(delta_n):
    model = node_plus_delta(model, layer_n, wb_n, node_n, if_print=0)
  errors, mean, var = get_errors(model, x, y)
  loss = get_loss(errors)

  return loss

def check_node_loss(model, x, y, layer_n, wb_n, node_n, benchmark_loss):
  node_loss = node_plus_delta_loss(model, x, y, layer_n, wb_n, node_n)
  delta_n = 1
  n=0
  
  while benchmark_loss == node_loss and n<10: 
    n+=1
    delta_n+=1
    node_loss = node_plus_delta_loss(model, x, y, layer_n, wb_n, node_n, delta_n)
  
    if n==10:
      while benchmark_loss == node_loss and n<20: 
        n+=1
        delta_n+=10
        node_loss = node_plus_delta_loss(model, x, y, layer_n, wb_n, node_n, delta_n)

    if n==20:
      while benchmark_loss == node_loss and n<30: 
        n+=1
        delta_n+=100
        node_loss = node_plus_delta_loss(model, x, y, layer_n, wb_n, node_n, delta_n)

    if n==30:
      while benchmark_loss == node_loss and n<40: 
        n+=1
        delta_n+=1000
        node_loss = node_plus_delta_loss(model, x, y, layer_n, wb_n, node_n, delta_n)
  
  return node_loss, delta_n

def nodes_plus_delta_loss(model, x, y, benchmark_loss):
  layer_0_w_losses = []
  layer_0_b_losses = []
  layer_1_w_losses = []
  layer_1_b_losses = []
  
  layer_0_w_delta_n = []
  layer_0_b_delta_n = []
  layer_1_w_delta_n = []
  layer_1_b_delta_n = []
  
  for i in range(5):
    loss, delta_n = check_node_loss(model, x, y, 0, 0, i, benchmark_loss=benchmark_loss)
    layer_0_w_losses.append(loss)
    layer_0_w_delta_n.append(delta_n)

  for i in range(5):
    loss, delta_n = check_node_loss(model, x, y, 0, 1, i, benchmark_loss=benchmark_loss)
    layer_0_b_losses.append(loss)
    layer_0_b_delta_n.append(delta_n)

  for i in range(5):
    loss, delta_n = check_node_loss(model, x, y, 1, 0, i, benchmark_loss=benchmark_loss)
    layer_1_w_losses.append(loss)
    layer_1_w_delta_n.append(delta_n)
   
  loss, delta_n = check_node_loss(model, x, y, 1, 1, 0, benchmark_loss=benchmark_loss)                                
  layer_1_b_losses.append(loss)
  layer_1_b_delta_n.append(delta_n)
  
  nodes_losses = [layer_0_w_losses, layer_0_b_losses, layer_1_w_losses, layer_1_b_losses]
  nodes_delta_n = [layer_0_w_delta_n, layer_0_b_delta_n, layer_1_w_delta_n, layer_1_b_delta_n]
  return nodes_losses,nodes_delta_n

def get_gradient(nodes_losses, nodes_delta_n, benchmark_loss):
  gradients = []
  for i in range(len(nodes_losses)):
    grads = []
    for j in range(len(nodes_losses[i])):
      grad = (nodes_losses[i][j]-benchmark_loss)/(nodes_delta_n[i][j]**10**(-6))
      grads.append(grad)
    gradients.append(grads)
    
  return gradients

def update_nodes(model, grads, nodes_delta_n, if_print = False, lr = 0.001):
  if if_print: 
    print("\n OLD:")
    print("model.layers[0].trainable_variables[0]:", model.layers[0].trainable_variables[0])
    print("model.layers[0].trainable_variables[1]:", model.layers[0].trainable_variables[1])
    print("model.layers[1].trainable_variables[0]:", model.layers[1].trainable_variables[0])
    print("model.layers[1].trainable_variables[1]:", model.layers[1].trainable_variables[1])
  for i in range(len(grads)):
    for j in range(len(grads[i])):
      delta = lr*grads[i][j]*nodes_delta_n[i][j]*10**(-6)
      #print("grad:", grads[i][j])
      #print("delta:", delta)
      model = node_plus_delta(model, layer_n=0 if i<2 else 1, wb_n=i%2, node_n=j, delta = -delta)
      
  if if_print: 
    print("\n NEW:")
    print("model.layers[0].trainable_variables[0]:", model.layers[0].trainable_variables[0])
    print("model.layers[0].trainable_variables[1]:", model.layers[0].trainable_variables[1])
    print("model.layers[1].trainable_variables[0]:", model.layers[1].trainable_variables[0])
    print("model.layers[1].trainable_variables[1]:", model.layers[1].trainable_variables[1])
    
  return model

In [0]:
def train_dist(step_n=50):
  
  x, y = get_data(1000)
  model = get_benchmark_NN(x, y)
  print_model(model)
  errors, mean, sigma = get_errors(model, x, y, if_print=1)
  errors_first = errors

  benchmark_loss = get_loss(errors, if_plot=1)
  print("\nInitial loss:", benchmark_loss)
  
  lr_init = 0.1
  lr = lr_init
  patience_init = 20
  patience = patience_init
  losses=[]; errors_mses=[]; errors_means=[]; errors_all = []
  losses.append(benchmark_loss); errors_mses.append(sigma**2); errors_means.append(mean); errors_all.append(errors)
  for i in range(step_n):
    print("-----Step", i)
    if i < (step_n-1): if_print=0
    else: if_print=0
      
    # estimate gradient
    nodes_losses, nodes_delta_n = nodes_plus_delta_loss(model, x, y, benchmark_loss)
    grads = get_gradient(nodes_losses, nodes_delta_n, benchmark_loss)
    model = update_nodes(model, grads, nodes_delta_n, if_print=if_print, lr=lr)
    
    # get errors and losses
    errors, mean, sigma = get_errors(model, x, y, if_print=if_print)
    new_loss = get_loss(errors, if_plot=if_print)
    if new_loss < np.min(losses): patience = patience_init
    else: patience-= 1
     
    # record results
    losses.append(new_loss)
    errors_mses.append(sigma**2)
    errors_means.append(mean)
    errors_all.append(errors)
    print("New loss:", new_loss, "    (learning rate:", lr, ")")
    if patience <0: break
  
  print_model(model)
  errors_last = errors
  xs = np.arange(len(losses)+1)

  plt.figure(dpi=100, figsize=(20, 2))
  plt.xticks(fontsize=10)
  plt.subplots_adjust(wspace=0.2)

  p1 = plt.subplot(131)
  p1.plot(xs, losses)
  plt.xlabel("step")
  plt.ylabel("Loss")
  
  p2 = plt.subplot(132)
  p2.plot(xs, errors_mses)
  plt.xlabel("step")
  plt.ylabel("Errors MSE")

  p3 = plt.subplot(133)
  p3.plot(xs, errors_means)
  plt.xlabel("step")
  plt.ylabel("Errors Mean")

  return losses, errors_all, model
  
start_time = time.time()
losses, errors_all, model = train_dist(2)
print("--- %s seconds ---" % (time.time() - start_time))
model_name = 'model_4_NoOptimizer_1000epochs_distOnly.h5'
model.save(model_name)
file = open('errors_all_0709_NoOptimizer_5000epochs.txt', 'w')
for ers in errors_all:
  file.write("\n")
  file.write(str(ers))
file.close()
print(losses)
print(errors_all)

x[:2]: [0.09762701 0.43037873]
<tensorflow.python.keras.optimizer_v2.gradient_descent.SGD object at 0x7f57202aea58>
Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_4 (Dense)              (None, 5)                 10        
_________________________________________________________________
dense_5 (Dense)              (None, 1)                 6         
Total params: 16
Trainable params: 16
Non-trainable params: 0
_________________________________________________________________


In [2]:
print("Before training")
plot_errors(errors_all[0])
print("After training")
plot_errors(errors_all[-1])

After training


NameError: ignored

After training


NameError: ignored

In [0]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [0]:
%cd /content/gdrive/My\ Drive/Colab\ Notebooks/
!pwd
file = open('errors_all_0709_4_NoOptimizer.txt', 'w')
for ers in errors_all:
  file.write("\n")
  file.write(str(ers))
file.close()

/content/gdrive/My Drive/Colab Notebooks
/content/gdrive/My Drive/Colab Notebooks


In [0]:
!pip install -q h5py pyyaml
from __future__ import absolute_import, division, print_function, unicode_literals
import os
tf.__version__

In [0]:
new_model = tf.keras.models.load_model('model_4_NoOptimizer_2000epochs_distOnly.h5')
new_model.summary()

Model: "sequential_14"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_28 (Dense)             (None, 5)                 10        
_________________________________________________________________
dense_29 (Dense)             (None, 1)                 6         
Total params: 16
Trainable params: 16
Non-trainable params: 0
_________________________________________________________________


In [0]:
a