#Importing Dependencies

In [None]:
!pip install flask-ngrok
!pip install pyngrok

In [None]:
from flask_ngrok import run_with_ngrok
from flask import Flask, request, send_file
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from keras import Model, optimizers
from keras.models import load_model
from keras.layers import *
from keras.utils import plot_model
from datetime import datetime
import os
import sys
import json
from google.colab import drive, userdata
import time
import tempfile
import threading
from pyngrok import ngrok
from collections import deque
import ipywidgets as widgets
from IPython.display import display, HTML, clear_output
import traceback
import re
!ngrok authtoken 2bfZVe89b38P2ucDrS9ncLziKEp_77cvUVKv3h6asfZgdhTvS
drive.mount('/content/gdrive')

#Creating Model Class

In [None]:
class Model_builder :
  def __init__(self, id, isNew = True) :
    print(tf.config.list_physical_devices('GPU'))
    tf.keras.utils.set_random_seed(13)
    self.optm_vars = {}
    if isNew :
      self.build_model()
      self.logs = pd.DataFrame({'Actor Loss': [],
                                'Critic Loss': [],
                                'Entropy' : [],
                                'Total Loss' : []
                              })
      self.base_optimizer = optimizers.Adam()
      self.actor_optimizer = optimizers.Adam()
      self.critic_optimizer = optimizers.Adam()
      self.save_model(id)
    else :
      self.base_model = load_model(os.path.join('/content/gdrive', 'My Drive', 'CHESS-AI', id, 'base_model.keras'))
      self.actor_model = load_model(os.path.join('/content/gdrive', 'My Drive', 'CHESS-AI', id, 'actor_model.keras'))
      self.critic_model = load_model(os.path.join('/content/gdrive', 'My Drive', 'CHESS-AI', id, 'critic_model.keras'))
      self.compile_model()
      self.logs = pd.read_csv(os.path.join('/content/gdrive', 'My Drive', 'CHESS-AI', id, 'logs.csv'))
      self.base_optimizer = optimizers.Adam()
      self.actor_optimizer = optimizers.Adam()
      self.critic_optimizer = optimizers.Adam()
      #self.base_optimizer = self.load_optimizers(id, "base")
      #self.actor_optimizer = self.load_optimizers(id, "actor")
      #self.critic_optimizer = self.load_optimizers(id, "critic")

  def build_model(self) :
    # Designing NN Architecture
    inp_base = Input(shape = [8,8,60], name = "base_input", dtype = tf.float32)
    base1 = Conv2D(256,(3,3), padding = "same", dtype = tf.float32, kernel_initializer = tf.keras.initializers.GlorotNormal())(inp_base)
    base1 = ELU(alpha=0.1)(base1)
    base = BatchNormalization(dtype = tf.float32)(base1)
    base = Conv2D(256,(3,3), padding = "same", dtype = tf.float32, kernel_initializer = tf.keras.initializers.GlorotNormal())(base)
    base = ELU(alpha=0.1)(base)
    base = Add(dtype = tf.float32)([base, base1])
    base = BatchNormalization(dtype = tf.float32)(base)
    base = Conv2D(256,(3,3), padding = "same", dtype = tf.float32, kernel_initializer = tf.keras.initializers.GlorotNormal())(base)
    base = ELU(alpha=0.1)(base)
    base = Add(dtype = tf.float32)([base, base1])
    base = BatchNormalization(dtype = tf.float32)(base)
    base = Conv2D(256,(3,3), padding = "same", dtype = tf.float32, kernel_initializer = tf.keras.initializers.GlorotNormal())(base)
    base = ELU(alpha=0.1)(base)
    base = Add(dtype = tf.float32)([base, base1])
    base = BatchNormalization(dtype = tf.float32)(base)
    base = Conv2D(256,(3,3), padding = "same", dtype = tf.float32, kernel_initializer = tf.keras.initializers.GlorotNormal())(base)
    base = ELU(alpha=0.1)(base)
    base = Add(dtype = tf.float32)([base, base1])
    base = BatchNormalization(dtype = tf.float32)(base)
    base1 = Conv2D(128,(3,3), padding = "same", dtype = tf.float32, kernel_initializer = tf.keras.initializers.GlorotNormal())(base)
    base1 = ELU(alpha=0.1)(base1)
    base = BatchNormalization(dtype = tf.float32)(base1)
    base = Conv2D(128,(3,3), padding = "same", dtype = tf.float32, kernel_initializer = tf.keras.initializers.GlorotNormal())(base)
    base = ELU(alpha=0.1)(base)
    base = Add(dtype = tf.float32)([base, base1])
    base_head = BatchNormalization(name="bn4", dtype = tf.float32)(base) #8,8,128
    self.base_model = Model(inputs= inp_base, outputs=base_head, name = "base_model")

    inp_value = Input(shape = [8,8,128], name = "Value_Inp", dtype = tf.float32)
    val = Conv2D(64, (3, 3), padding='same', dtype = tf.float32, kernel_initializer = tf.keras.initializers.GlorotNormal())(inp_value)
    val = ELU(alpha=0.1)(val)
    val = BatchNormalization(dtype = tf.float64)(val)
    val = Flatten()(val)
    val1 = Dense(256, dtype = tf.float32, kernel_initializer = tf.keras.initializers.GlorotNormal())(val)
    val1 = ELU(alpha=0.1)(val1)
    val = BatchNormalization(dtype = tf.float32)(val1)
    val = Dense(256, name="v1", dtype = tf.float32, kernel_initializer = tf.keras.initializers.GlorotNormal())(val)
    val = ELU(alpha=0.1)(val)
    val = Add(dtype = tf.float32)([val, val1])
    val = BatchNormalization( name="v2", dtype = tf.float32)(val)
    val1 = Dense(128, name="v5", dtype = tf.float32, kernel_initializer = tf.keras.initializers.GlorotNormal())(val)
    val1 = ELU(alpha=0.1)(val1)
    val = BatchNormalization( name="v6", dtype = tf.float32)(val1)
    val = Dense(128, name="v7", dtype = tf.float32, kernel_initializer = tf.keras.initializers.GlorotNormal())(val)
    val = ELU(alpha=0.1)(val)
    val = Add(dtype = tf.float32)([val, val1])
    val = BatchNormalization(name="v8", dtype = tf.float32)(val)
    val = Dense(64, name="v9", dtype = tf.float32, kernel_initializer = tf.keras.initializers.GlorotNormal())(val)
    val = ELU(alpha=0.1)(val)
    val = BatchNormalization( name="v10", dtype = tf.float32)(val)
    value_head = Dense(1, activation = "tanh", name="v_head", dtype = tf.float64, kernel_initializer = tf.keras.initializers.GlorotNormal())(val)
    self.critic_model = Model(inputs= inp_value, outputs=value_head, name = "critic_model")

    inp_policy = Input(shape = [8,8,128], name = "Policy_Inp1", dtype = tf.float32)
    moves_mask = Input(shape = [8,8,10], name = "Policy_Inp2", dtype = tf.float32)
    policy = Conv2D(256, (3, 3), padding='same', dtype = tf.float32, kernel_initializer = tf.keras.initializers.GlorotNormal())(inp_policy)
    policy = ELU(alpha=0.1)(policy)
    policy = BatchNormalization(dtype = tf.float32)(policy)
    policy1 = Conv2D(128, (3, 3), padding='same', dtype = tf.float32, kernel_initializer = tf.keras.initializers.GlorotNormal())(policy)
    policy1 = ELU(alpha=0.1)(policy1)
    policy = BatchNormalization(dtype = tf.float32)(policy1)
    policy = Conv2D(128, (3, 3), padding='same', dtype = tf.float32, kernel_initializer = tf.keras.initializers.GlorotNormal())(policy)
    policy = ELU(alpha=0.1)(policy)
    policy = Add(dtype = tf.float32)([policy,policy1])
    policy = BatchNormalization(dtype = tf.float32)(policy)
    policy1 = Conv2D(64, (3, 3), padding='same', dtype = tf.float32, kernel_initializer = tf.keras.initializers.GlorotNormal())(policy)
    policy1 = ELU(alpha=0.1)(policy1)
    policy = BatchNormalization(dtype = tf.float32)(policy1)
    policy = Conv2D(64, (3, 3), padding='same', dtype = tf.float32, kernel_initializer = tf.keras.initializers.GlorotNormal())(policy)
    policy = ELU(alpha=0.1)(policy)
    policy = Add(dtype = tf.float32)([policy,policy1])
    policy = BatchNormalization(dtype = tf.float32)(policy)
    policy1 = Conv2D(32, (3, 3), padding='same', dtype = tf.float32, kernel_initializer = tf.keras.initializers.GlorotNormal())(policy)
    policy1 = ELU(alpha=0.1)(policy1)
    policy = BatchNormalization(dtype = tf.float32)(policy1)
    policy = Conv2D(32, (3, 3), padding='same', dtype = tf.float32, kernel_initializer = tf.keras.initializers.GlorotNormal())(policy)
    policy = ELU(alpha=0.1)(policy)
    policy = Add(dtype = tf.float32)([policy,policy1])
    policy = BatchNormalization(dtype = tf.float32)(policy)
    policy = Conv2D(10, (3, 3), padding='same', name = "p19", dtype = tf.float32, kernel_initializer = tf.keras.initializers.GlorotNormal())(policy)
    policy = ELU(alpha=0.1)(policy)
    policy = BatchNormalization( name = "p20", dtype = tf.float64)(policy)
    policy = Multiply(name = "pmul", dtype = tf.float64)([policy, moves_mask])
    policy = Flatten()(policy)
    policy_head = Softmax(name="p_head", dtype = tf.float64)(policy)
    self.actor_model = Model(inputs= [inp_policy, moves_mask], outputs=policy_head, name = "actor_model")
    self.compile_model()

  def compile_model(self) :
    inp1_merged = Input(shape = [8,8,60], name = "PositionMask", dtype = tf.float32)
    inp2_merged = Input(shape = [8,8,10], name = "LegalMovesMask", dtype = tf.float32)
    value_output = self.critic_model(self.base_model(inp1_merged))
    policy_output = self.actor_model([self.base_model(inp1_merged), inp2_merged])
    self.model = Model(inputs= (inp1_merged, inp2_merged), outputs=(value_output, policy_output))

  def update_model(self, inputs, masks, values, actions, id, config) :
    print("Model Update Begins")
    tf.keras.utils.set_random_seed(13)
    self.base_optimizer.learning_rate.assign(config["base_learning_rate"])
    self.actor_optimizer.learning_rate.assign(config["actor_learning_rate"])
    self.critic_optimizer.learning_rate.assign(config["critic_learning_rate"])
    epochs = 12
    train_dataset = tf.data.Dataset.from_tensor_slices((inputs, masks, values, actions))
    for epoch in range(epochs) :
      train_dataset_batches = train_dataset.shuffle(buffer_size=32000).batch(config["batch_size"])
      for step, (inputs_batch, masks_batch, values_batch, actions_batch) in enumerate(train_dataset_batches) :
        try :
          with tf.GradientTape() as base_tape, tf.GradientTape() as actor_tape, tf.GradientTape() as critic_tape:
            base_out = self.base_model(inputs = inputs_batch, training = True)
            value_predictions = self.critic_model(inputs = base_out, training = True)
            action_probs = self.actor_model(inputs = (base_out, masks_batch), training = True)
            actor_loss, critic_loss, total_loss = self.compute_loss(values_batch, tf.squeeze(value_predictions), action_probs, actions_batch, config)

          gradients_base = base_tape.gradient(total_loss, self.base_model.trainable_variables)
          gradients_actor = actor_tape.gradient(actor_loss, self.actor_model.trainable_variables)
          gradients_critic = critic_tape.gradient(critic_loss, self.critic_model.trainable_variables)

          self.base_optimizer.apply_gradients(zip(gradients_base, self.base_model.trainable_variables))
          self.actor_optimizer.apply_gradients(zip(gradients_actor, self.actor_model.trainable_variables))
          self.critic_optimizer.apply_gradients(zip(gradients_critic, self.critic_model.trainable_variables))

          print(("Epochs -", epoch+1, "/", epochs, "step -", step, "batch_loss -",tf.get_static_value(total_loss)))
          sys.stdout.flush()
        except Exception:
          print(traceback.print_exc())
    del train_dataset
    self.compile_model()

  def compute_loss(self, returns, value_predictions, action_probs, actions, config):
    advantages = tf.subtract(tf.cast(returns, dtype=tf.float64), value_predictions)
    critic_loss = tf.losses.mean_squared_error(returns, value_predictions)
    action_log_probs = tf.math.log(action_probs + 1e-10)
    entropy = tf.reduce_mean(tf.reduce_sum(action_probs * action_log_probs, axis = 1))
    selected_action_log_probs = tf.reduce_sum(action_log_probs * tf.cast(actions, dtype=tf.float64), axis = 1)
    actor_loss = tf.reduce_mean(tf.multiply(selected_action_log_probs, advantages))
    total_loss = config["actor_coefficient"] * actor_loss + config["critic_coefficient"] * critic_loss - config["entropy_coefficient"] * entropy

    self.logs = pd.concat([self.logs, pd.DataFrame({'Actor Loss': [tf.get_static_value(actor_loss)],
                                'Critic Loss': [tf.get_static_value(critic_loss)],
                                'Entropy' : [tf.get_static_value(entropy)],
                                'Total Loss' : [tf.get_static_value(total_loss)]
                              })], ignore_index= True)
    self.logs.reset_index(drop=True, inplace=True)
    return ((actor_loss - config["entropy_coefficient"] * entropy), critic_loss, total_loss)

  def get_piece_to_value(self, color, inp = True) :
    if inp :
      if(color == 1) :
        return {
        'P': 0, 'N': 1, 'B': 2, 'R': 3, 'Q': 4, 'K': 5,
        'p': 6, 'n':7, 'b': 8, 'r': 9, 'q': 10, 'k': 11
        }
      return {
        'p': 0, 'n': 1, 'b': 2, 'r': 3, 'q': 4, 'k': 5,
        'P': 6, 'N':7, 'B': 8, 'R': 9, 'Q': 10, 'K': 11
      }
    else :
      if(color == 1) :
        return {
        'P': 0, 'N': 1, 'B': 3, 'R': 5, 'Q': 7, 'K': 8
        }
      return {
        'p': 0, 'n': 1, 'b': 3, 'r': 5, 'q': 7, 'k': 8
      }

  def load_optimizers(self, id, name) :
    optm = optimizers.Adam()
    optm._variables = []
    with open(os.path.join('/content/gdrive', 'My Drive', 'CHESS-AI', id, name+'_optimizer_weights.json'), 'r') as file:
      json_string = file.read()
    optm_dict = json.loads(json_string)
    for i in range(len(optm_dict["data"])):
      optm.add_variable(optm_dict["data"][i]["shape"], tf.dtypes.as_dtype(optm_dict["data"][i]["dtype"]), name = optm_dict["data"][i]["name"])
      optm.variables[i].assign(optm_dict["data"][i]["value"])
    self.optm_vars[name] = len(optm_dict["data"])
    return optm

  def save_model(self, id) :
    print("Model Saving Process Initiated")
    directory = os.path.join('/content/gdrive', 'My Drive', 'CHESS-AI', id, "Exploration_Backup")
    if not os.path.exists(directory):
      os.makedirs(directory)
    self.model.save(os.path.join('/content/gdrive', 'My Drive', 'CHESS-AI', id, 'model.h5'), save_format = "h5")
    self.model.save(os.path.join('/content/gdrive', 'My Drive', 'CHESS-AI', id, 'model.keras'), save_format = "keras")
    self.base_model.save(os.path.join('/content/gdrive', 'My Drive', 'CHESS-AI', id, 'base_model.keras'), save_format = "keras")
    self.actor_model.save(os.path.join('/content/gdrive', 'My Drive', 'CHESS-AI', id, 'actor_model.keras'), save_format = "keras")
    self.critic_model.save(os.path.join('/content/gdrive', 'My Drive', 'CHESS-AI', id, 'critic_model.keras'), save_format = "keras")
    self.logs.to_csv(os.path.join('/content/gdrive', 'My Drive', 'CHESS-AI', id, 'logs.csv'))
    self.save_optimizers(id, self.base_optimizer, "base")
    self.save_optimizers(id, self.actor_optimizer, "actor")
    self.save_optimizers(id, self.critic_optimizer, "critic")
    print("Model Saving Process Completed")

  def save_optimizers(self, id, optimizer, name) :
    optm_dict = {
        "data" : []
    }
    for v in optimizer.variables :
      optm_dict["data"].append({
          "shape" : v.shape.as_list(),
          "dtype" : re.split(pattern="(?::|')", string = str(v.dtype))[2],
          "name" : v.name.split(":")[0],
          "value" : v.numpy().tolist()
      })
    with open(os.path.join('/content/gdrive', 'My Drive', 'CHESS-AI', id, name+'_optimizer_weights.json'), 'w') as drive_file:
      drive_file.write(json.dumps(optm_dict, indent=2))
    print(("original optimisers vars length", self.optm_vars[name], "new optimisers vars length", len(optm_dict["data"])))
    self.optm_vars[name] = len(optm_dict["data"])

  def get_model_summary(self,visualize = True) :
    if(visualize) :
      return plot_model(self.model,show_shapes = True, show_dtype=True, expand_nested=True)
    else :
      return self.model.summary()
#Model_builder("vjvu").get_model_summary(False)

#Setting Up Server

In [None]:
model = None
queue = None
completed_updates = None
app= Flask(__name__)
run_with_ngrok(app)

@app.route("/ready-model/<id>", methods = ["GET"])
def load(id):
  global model
  if id == "None" :
    id = datetime.now().strftime("%d_%m_%Y-%H_%M_%S")
    model[id] = Model_builder(id)
    print("Created Model with id - " + id)
  else :
    if id in model :
      print(f"Model with id {id} is already loaded and ready to be used")
      return id
    model[id]= Model_builder(id, False)
    print("Loaded Model with id - " + id)
  return id

@app.route("/download-model/<id>", methods = ["GET"])
def return_model(id):
  temp_file = tempfile.NamedTemporaryFile(delete=False, suffix = ".keras")
  model[id].model.save(temp_file.name, save_format = "keras")
  temp_file.close()
  return send_file(temp_file.name, as_attachment=True)

@app.route("/train/<id>", methods = ["POST"])
def update(id) :
  global model
  if id not in model :
    return "Error", 400
  global queue
  data = request.get_json()
  ack_id = datetime.now().strftime("%H_%M_%S_%f")
  queue.append((id,data["inputs"], data["masks"], data["values"], data["actions"], data["config"], ack_id))
  print(("Appending Queue... Final size is - ", len(queue)))
  return ack_id

@app.route("/check-update-status/<ack_id>", methods = ["GET"])
def check(ack_id):
  global completed_updates
  if ack_id in completed_updates :
    completed_updates.remove(ack_id)
    return "Done"
  return "Not Done"

@app.route("/save-model/<id>", methods = ["GET"])
def save(id):
  model[id].save_model(id)
  return "Model Saved Successfully"

def process_update() :
  global queue
  global model
  global completed_updates
  model = {}
  queue = deque()
  completed_updates = set()
  while True:
    if queue:
      data = queue.popleft()
      print("Starting Update for ID - "+ data[0])
      model[data[0]].update_model(data[1], data[2], data[3], data[4], data[0], data[5])
      completed_updates.add(data[6])
      if(len(queue)==0) :
        model[data[0]].save_model(data[0])

    else :
      #print("Nothing to Update")
      time.sleep(1)

#Setting Up Live Plotting



In [None]:
# @title Live Plotting Code {display-mode: "form"}
plot_output = widgets.Output(layout={'border': '1px solid black'})
display(widgets.VBox([widgets.HTML(value="<b>Live Plot:</b>"), plot_output]))

def plot_metrics(model) :
  clear_output(wait = True)
  count = 0
  fig = plt.figure(figsize = (16,8))
  while True :
    if(len(model) != 0) :
      if count == len(model) :
        update_plots(model, fig)
      else :
        count = draw_plots(model, fig)
    time.sleep(4)

def draw_plots(model, fig) :
  fig.clf()
  with plot_output:
    clear_output(wait = True)
    keys = list(model.keys())
    for i in range(len(model)) :
      ax = fig.add_subplot(1, len(model), i+1)
      if not model[keys[i]].logs.empty :
        ax.clear()
        ax.plot(model[keys[i]].logs["Actor Loss"], label = "Actor Loss")
        ax.plot(model[keys[i]].logs["Critic Loss"], label = "Critic Loss")
        ax.plot(model[keys[i]].logs["Entropy"], label = "Entropy")
        ax.plot(model[keys[i]].logs["Total Loss"], label = "Total Loss")
        ax.legend(loc = 'upper right')
    display(fig)
  return len(model)
def update_plots(model, fig) :
  with plot_output:
    clear_output(wait = True)
    keys = list(model.keys())
    for i, ax in enumerate(fig.get_axes()):
      if not model[keys[i]].logs.empty :
        ax.clear()
        ax.plot(model[keys[i]].logs["Actor Loss"], label = "Actor Loss")
        ax.plot(model[keys[i]].logs["Critic Loss"], label = "Critic Loss")
        ax.plot(model[keys[i]].logs["Entropy"], label = "Entropy")
        ax.plot(model[keys[i]].logs["Total Loss"], label = "Total Loss")
        ax.legend(loc = 'upper right')
    display(fig)

#Running Server

In [None]:
try:
  threading.Thread(target=process_update, daemon = True).start()
  #threading.Thread(target=plot_metrics, args = [model], daemon = True).start()
  url = ngrok.connect(5000).public_url
  with open(os.path.join('/content/gdrive', 'My Drive', 'CHESS-AI', 'url.txt'), 'w') as file:
    file.write(url)
  app.run()
except Exception as e:
    print(e)
    ngrok.kill()
    del model
    del completed_updates

In [None]:
ngrok.kill()