In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
import random

from tensorflow.python.ops import tensor_array_ops, control_flow_ops



In [None]:
# @title GenLoader Code
#Generator loader Code

class gen_Data_loader():
  def __init__(self,batch_size):
    self.batch_size = batch_size
    self.token_stream = []

  def create_batches(self,data_file):
    self.token_stream = []
    with open(data_file, "r") as f:
      for line in f:
        line = line.strip()
        line = line.split()
        parse_line = [int(x) for x in line]
        if len(parse_line) == 20:

          self.token_stream.append(parse_line)
    self.num_batch = int(len(self.token_stream) / self.batch_size)
    self.token_stream = self.token_stream[:self.num_batch * self.batch_size]
    self.sequence_batch = np.split(np.array(self.token_stream), self.num_batch, 0)
    self.pointer = 0

  def next_batch(self):
    ret = self.sequence_batch[self.pointer]
    self.pointer = (self.pointer +1) % self.num_batch
    return ret
  def reset_pointer(self):
    self.pointer = 0


#Needed for pretraining the Generator


In [None]:
# @title DisLoader Code
#Discriminator loader Code


class Dis_dataloader():
  def __init__(self, batch_size):
    self.batch_size = batch_size
    self.sentences = np.array([])
    self.labels = np.array([])

  def load_train_data(self, positive_file, negative_file):
    positive_examples = []
    negative_examples = []
    with open(positive_file) as fin:
      for line in fin:
          line = line.strip()
          line = line.split()
          parse_line = [int(x) for x in line]
          positive_examples.append(parse_line)
    with open(negative_file)as fin:
      for line in fin:
          line = line.strip()
          line = line.split()
          parse_line = [int(x) for x in line]
          if len(parse_line) == 20:
            negative_examples.append(parse_line)
          self.sentences = np.array(positive_examples + negative_examples)
          positive_labels = [[0, 1] for _ in positive_examples]
          negative_labels = [[1, 0] for _ in negative_examples]
          self.labels = np.concatenate([positive_labels, negative_labels], 0)

          # Shuffle the data
          shuffle_indices = np.random.permutation(np.arange(len(self.labels)))
          self.sentences = self.sentences[shuffle_indices]
          self.labels = self.labels[shuffle_indices]

          # Split batches
          self.num_batch = int(len(self.labels) / self.batch_size)
          self.sentences = self.sentences[:self.num_batch * self.batch_size]
          self.labels = self.labels[:self.num_batch * self.batch_size]
          self.sentences_batches = np.split(self.sentences, self.num_batch, 0)
          self.labels_batches = np.split(self.labels, self.num_batch, 0)

          self.pointer = 0
  def next_batch(self):
      ret = self.sentences_batches[self.pointer], self.labels_batches[self.pointer]
      self.pointer = (self.pointer + 1) % self.num_batch
      return ret

  def reset_pointer(self):
      self.pointer = 0




In [None]:
# @title Discriminator Code
#Discriminator Code



#Linear layer code




def linear(input__, output_size, scope=None):
  shape = input__.get_shape().as_list()
  if len(shape) != 2:
        raise ValueError("Linear is expecting 2D arguments: %s" % str(shape))
  if not shape[1]:
        raise ValueError("Linear expects shape[1] of arguments: %s" % str(shape))
  input_size = shape[1]


  with tf.variable_scope(scope or "SimpleLinear"):
    matrix = tf.get_variable("Matrix", [output_size, input_size], dtype= input__.dtype)
    bias_term = tf.get_variable("Bias", [output_size], dtype=input__.dtype)
  return tf.matmul(input__, tf.transpose(matrix)) + bias_term


def highway(input__, size, num_layers=1, bias=-2.0, f = tf.nn.relu, scope="Highway"):
    with tf.variable_scope(scope):
      for idx in range(num_layers):
        g = f(linear(input_, size, scope='highway_lin_%d' % idx))

        t = tf.sigmoid(linear(input_, size, scope='highway_gate_%d' % idx) + bias)

        output = t * g + (1. - t) * input_
        input_ = output

    return output



#Coding the actual discriminator itself

class Discriminator(object):
    def __init__(self, sequence_length, data_size,l2_reg_lambda=0.0):
        self.input_x = tf.placeholder(tf.int32, [None, sequence_length], name="input_x")
        self.input_y = tf.placeholder(tf.float32, [None, num_classes], name="input_y")
        self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob")

        # l2 regularization for overfitting correction
        l2_loss = tf.constant(0.0)

        with tf.variable_scope("discriminator"):



            def conv_layer(inputs, filters, kernel_size, name):
                return tf.layers.conv2d(
                    inputs=inputs,
                    filters=filters,
                    kernel_size=kernel_size,
                    padding='SAME',
                    activation=tf.nn.relu,
                    name=name
                )

            # First convolutional layer
            conv1 = conv_layer(self.input_x, filters=64, kernel_size=3, name="conv1")

            # Second convolutional layer
            conv2 = conv_layer(conv1, filters=32, kernel_size=3, name="conv2")

            def dense_layer(inputs, units, name, activation=tf.nn.relu):
                return tf.layers.dense(
                    inputs=inputs,
                    units=units,
                    activation=activation,
                    name=name
                )

            # Flatten the output from conv2
            flattened = tf.layers.flatten(conv2)

            # Highway layers
            highway_output = highway(flattened, flattened.get_shape()[1].value, num_layers=2)

            # Dropout
            highway_output_dropout = tf.nn.dropout(highway_output, self.dropout_keep_prob)

            # Six fully connected layers with ReLU activation and dropout
            fc1 = dense_layer(highway_output_dropout, units=1024, name="fc1")
            fc1_dropout = tf.nn.dropout(fc1, self.dropout_keep_prob)
            fc2 = dense_layer(fc1_dropout, units=512, name="fc2")
            fc2_dropout = tf.nn.dropout(fc2, self.dropout_keep_prob)
            fc3 = dense_layer(fc2_dropout, units=256, name="fc3")
            fc3_dropout = tf.nn.dropout(fc3, self.dropout_keep_prob)
            fc4 = dense_layer(fc3_dropout, units=128, name="fc4")
            fc4_dropout = tf.nn.dropout(fc4, self.dropout_keep_prob)
            fc5 = dense_layer(fc4_dropout, units=64, name="fc5")
            fc5_dropout = tf.nn.dropout(fc5, self.dropout_keep_prob)
            fc6 = dense_layer(fc5_dropout, units=32, name="fc6")
            fc6_dropout = tf.nn.dropout(fc6, self.dropout_keep_prob)

            def conv_pool_layer(inputs, filters, kernel_size, pool_size, name):
                with tf.name_scope(name):
                    # Convolutional Layer
                    conv = tf.layers.conv2d(
                        inputs=inputs,
                        filters=filters,
                        kernel_size=kernel_size,
                        padding='SAME',
                        activation=tf.nn.relu,
                        name=f"{name}_conv"
                    )

                    # Pooling Layer
                    pool = tf.layers.max_pooling2d(
                        inputs=conv,
                        pool_size=pool_size,
                        strides=pool_size,
                        name=f"{name}_pool"
                    )

                return pool

            # Reshape fc6 for conv_pool_layer
            fc6_reshaped = tf.expand_dims(tf.expand_dims(fc6_dropout, 1), 1)

            # Creating a pooled convolutional layer
            pooled_conv = conv_pool_layer(
                inputs=fc6_reshaped,
                filters=64,
                kernel_size=[1, 1],
                pool_size=[1, 1],
                name="pooled_conv_1"
            )

            # Final scores and predictions
            with tf.name_scope("output"):
                self.scores = tf.layers.dense(tf.layers.flatten(pooled_conv), num_classes, name="scores")
                self.predictions = tf.argmax(self.scores, 1, name="predictions")

            # Calculate mean cross-entropy loss
            with tf.name_scope("loss"):
                losses = tf.nn.softmax_cross_entropy_with_logits(logits=self.scores, labels=self.input_y)
                self.loss = tf.reduce_mean(losses) + l2_reg_lambda * l2_loss

            # Accuracy
            with tf.name_scope("accuracy"):
                correct_predictions = tf.equal(self.predictions, tf.argmax(self.input_y, 1))
                self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, "float"), name="accuracy")




Discriminator Code

In [None]:
# @title Generator Code
# Generator code


import tensorflow as tf

from tensorflow.python.ops import tensor_array_ops, control_flow_ops


class Generator(object):
  def __init__(self, num_data, batch_size, hidden_dim,sequence_length, start_token, learning_rate=0.01, reward_gamma=0.95):
    self.num_data = num_data
    self.batch_size = batch_size

    self.hidden_dim = hidden_dim
    self.sequence_length = sequence_length
    self.start_token = start_token
    self.learning_rate = learning_rate
    self.reward_gamma = reward_gamma
    self.g_params = []
    self.d_params = []
    self.temperature = 1.0
    self.grad_clip = 5.0


    self.expected_reward = tf.Variable(tf.zeroes([self.sequence_length]))

    with tf.variable_scope("generator"):
      self.g_recurrent_unit = self.create_recurrent_unit(self.g_params)
      self.g_output_unit = self.create_output_unit(self.g_params)


    self.x = tf.placeholder(tf.int32, shape=[self.batch_size, self.sequence_length])
    self.rewards = tf.placeholder(tf.float32, shape=[self.batch_size, self.sequence_length])


    self.processed_x = self.x


    self.h0 = tf.zeroes([self.batch_size, self.hidden_dim])

    self.h0 = tf.stack([self.h0, self.h0])

    self.gen_o = tensor_array_ops.TensorArray(dtype=tf.float32, size=self.sequence_length,
                                         dynamic_size=False, infer_shape=True)

    self.gen_x = tensor_array_ops.TensorArray(dtype= tf.int32, size=self.sequence_length, dynamic_size=False, infer_shape=True)

    def _g_recurrence(i,x_t,h_tm1,gen_x):
      h_t = self.g_recurrent_unit(x_t, h_tm1)

      next_output = self.g_output_unit(h_t) #produces actual data instead of logits, produces actual data instead of logits
      gen_x = gen_x.write(i, next_output)
      x_tp1 = next_output
      return i+1,x_tp1, h_t, gen_x


    initial_value = tf.random.normal([self.batch_size, 1])


    _,_, _, self.gen_x = control_flow_ops.while_loop(
        cond=lambda i,_1, _2, _3 : i < self.sequence_length,
        body=_g_recurrence,
        loop_vars=(tf.constant(0,dtype=tf.int32),initial_value, self.h0, self.gen_x))
    self.gen_x = self.gen_x.stack()  # seq_length x batch_size
    self.gen_x = tf.transpose(self.gen_x, perm=[1, 0])  # batch_size x seq_length

    # Supervised Pretraining for Generator

    g_predictions = tensor_array_ops.TensorArray(dtype= tf.float32, size=self.sequence_length, dynamic_size=False, infer_shape=True)
    seq_array = tensor_array_ops.TensorArray(dtype=tf.float32, size=self.sequence_length)
    ta_emb_x = seq_array.unstack(self.x)


    def _pretrain_recurrence(i, x_t, h_tm1, g_predictions):
      h_t = self.g_recurrent_unit(x_t, h_tm1)
      o_t = self.g_output_unit(h_t)
      g_predictions = g_predictions.write(i, o_t)
      x_tp1 = ta_emb_x.read(i)
      return i+1, x_tp1, h_t, g_predictions

    _,_,_,self.g_predictions = control_flow_ops.while_loop(cond=lambda i,_1, _2, _3:i < self.sequence_length, body=_pretrain_recurrence, loop_vars=(tf.constant(0,dtype=tf.int32), self.x[:,0,:], self.h0, g_predictions))

    self.g_predictions = tf.transpose(self.g_predictions.stack(), perm=[1,0,2])
    self.pretrain_loss = -tf.reduce_mean(
        tf.square(self.g_predictions - self.x)
    )
    pretrain_opt = self.g_optimizer(self.learning_rate)
    self.pretrain_grad, _ = tf.clip_by_global_norm(tf.gradients(self.pretrain_loss, self.g_params), self.grad_clip)
    self.pretrain_updates = pretrain_opt.apply_gradients(zip(self.pretrain_grad, self.g_params))
    #######################################################################################################
    #  Unsupervised Pretraining
    #######################################################################################################
    #g_predictions_reshaped = tf.reshape(self.g_predictions, [-1])
    #x_reshaped = tf.reshape(self.x, [-1])

    # Calculate squared error
    #squared_error = tf.square(g_predictions_reshaped - x_reshaped)

    # Calculate the mean squared error
    #mse = tf.reduce_mean(squared_error)
    #self.g_loss = tf.reduce_sum(mse * tf.reshape(self.rewards, [-1]))
    #g_opt = self.g_optimizer(self.learning_rate)

    self.g_grad, _ = tf.clip_by_global_norm(tf.gradients(self.g_loss, self.g_params), self.grad_clip)
    self.g_updates = g_opt.apply_gradients(zip(self.g_grad, self.g_params))
  def generate(self, sess):
        outputs = sess.run(self.gen_x)
        return outputs

  def pretrain_step(self, sess, x):
        outputs = sess.run([self.pretrain_updates, self.pretrain_loss], feed_dict={self.x: x})
        return outputs

  def init_matrix(self, shape):
        return tf.random_normal(shape, stddev=0.1)

  def init_vector(self, shape):
        return tf.zeros(shape)

  def create_recurrent_unit(self, params):
        # Weights and Bias for input and hidden tensor
        self.Wi = tf.Variable(self.init_matrix([self.emb_dim, self.hidden_dim]))
        self.Ui = tf.Variable(self.init_matrix([self.hidden_dim, self.hidden_dim]))
        self.bi = tf.Variable(self.init_matrix([self.hidden_dim]))

        self.Wf = tf.Variable(self.init_matrix([self.emb_dim, self.hidden_dim]))
        self.Uf = tf.Variable(self.init_matrix([self.hidden_dim, self.hidden_dim]))
        self.bf = tf.Variable(self.init_matrix([self.hidden_dim]))

        self.Wog = tf.Variable(self.init_matrix([self.emb_dim, self.hidden_dim]))
        self.Uog = tf.Variable(self.init_matrix([self.hidden_dim, self.hidden_dim]))
        self.bog = tf.Variable(self.init_matrix([self.hidden_dim]))

        self.Wc = tf.Variable(self.init_matrix([self.emb_dim, self.hidden_dim]))
        self.Uc = tf.Variable(self.init_matrix([self.hidden_dim, self.hidden_dim]))
        self.bc = tf.Variable(self.init_matrix([self.hidden_dim]))
        params.extend([
            self.Wi, self.Ui, self.bi,
            self.Wf, self.Uf, self.bf,
            self.Wog, self.Uog, self.bog,
            self.Wc, self.Uc, self.bc])

        def unit(self,x, hidden_memory_tm1):
            previous_hidden_state, c_prev = tf.unstack(hidden_memory_tm1)

            # Input Gate
            i = tf.sigmoid(
                tf.matmul(x, self.Wi) +
                tf.matmul(previous_hidden_state, self.Ui) + self.bi
            )

            # Forget Gate
            f = tf.sigmoid(
                tf.matmul(x, self.Wf) +
                tf.matmul(previous_hidden_state, self.Uf) + self.bf
            )

            # Output Gate
            o = tf.sigmoid(
                tf.matmul(x, self.Wog) +
                tf.matmul(previous_hidden_state, self.Uog) + self.bog
            )

            # New Memory Cell
            c_ = tf.nn.tanh(
                tf.matmul(x, self.Wc) +
                tf.matmul(previous_hidden_state, self.Uc) + self.bc
            )

            # Final Memory cell
            c = f * c_prev + i * c_

            # Current Hidden state
            current_hidden_state = o * tf.nn.tanh(c)

            return tf.stack([current_hidden_state, c])

        return unit

  def create_output_unit(self, params):
        self.Wo = tf.Variable(self.init_matrix([self.hidden_dim, self.num_emb]))
        self.bo = tf.Variable(self.init_matrix([self.num_emb]))
        params.extend([self.Wo, self.bo])

        def unit(hidden_memory_tuple):
            hidden_state, c_prev = tf.unstack(hidden_memory_tuple)
            # hidden_state : batch x hidden_dim
            logits = tf.matmul(hidden_state, self.Wo) + self.bo
            # output = tf.nn.softmax(logits)
            return logits

        return unit

  def g_optimizer(self, *args, **kwargs):
        return tf.train.AdamOptimizer(*args, **kwargs)

In [None]:
# @title Oracle Model Code


Need to find a different model to make me the mock data here.

In [None]:
# @title Rollout/ Reinforcement Learning Framework

class ROLLOUT(object):
  def __init__(self, lstm, update_rate):
    self.lstm = lstm
    self.update_rate = update_rate
    self.sequence_length = self.lstm.sequence_length
    self.start_token = tf.identity(self.lstm.start_token)
    self.learning_rate = self.lstm.learning_rate
    self.start_token = tf.identity(self.lstm.start_token)
    self.g_recurrent_unit = self.create_recurrent_unit()
    self.g_output_unit = self.create_output_unit()



    self.x = tf.placeholder(tf.float32, shape=[self.lstm.batch_size, self.sequence_length])
    self.given_num = tf.placeholder(tf.int32)

    ta_x = tensor_array_ops.TensorArray(dtype=tf.float32, size=self.sequence_length, dynamic_size=False, infer_shape=True)
    ta_x = ta_x.unstack(self.x)


    self.h0 = tf.zeros([self.batch_size, self.hidden_dim])
    self.h0 = tf.stack([self.h0, self.h0])

    gen_x = tensor_array_ops.TensorArray(dtype=tf.float32, size=self.sequence_length, dynamic_size=False, infer_shape=True)

    def _g_recurrence_1(i, x_t, h_tm1, given_num, gen_x):
      h_t = self.g_recurrent_unit(x_t, h_tm1)
      x_tp1 = ta_x.read(i)
      gen_x = gen_x.write(i, ta_x.read(i))
      return i+1, x_tp1, h_t, given_num, gen_x

    def _g_recurrence_2(i, x_t, h_tm1, given_num, gen_x):
      h_t = self.g_recurrent_unit(x_t, h_tm1)
      y_t = self.g_output_unit(h_t)

      x_tp1 = y_t

      gen_x = gen_x.write(i,y_t)  # indices, batch_size
      return i + 1, x_tp1, h_t, given_num, gen_x

    i, x_t, h_tm1, given_num, self.gen_x = control_flow_ops.while_loop(
        cond=lambda i, _1, _2, _3, _4: i < self.sequence_length,
        body=_g_recurrence_1,
        loop_vars=(tf.constant(0, dtype=tf.int32),ta_x.read(0) , self.h0, self.given_num, gen_x))


    _,_,_,_, self.gen_x = control_flow_ops.while_loop(
        cond=lambda i, _1, _2, _3, _4: i < self.sequence_length,
        body=_g_recurrence_2,
        loop_vars=(i, x_t, h_tm1, self.given_num, self.gen_x))
    self.gen_x = self.gen_x.stack()  # seq_length x batch_size
    self.gen_x = tf.transpose(self.gen_x, perm=[1, 0])  # batch_size x seq_length
    ################################### MAIN REWARD FUNCTION AND MAIN PART OF THE REINFORCEMENT LEARNING FRAMEWORK
  def get_reward(self, sess, input_x, rollout_num, discriminator):
    rewards = []

    for i in range(rollout_num):
      for given_num in range(1, self.sequence_length):
        feed = {self.x: input_x, self.given_num: given_num}
        samples = sess.run(self.gen_x, feed_dict=feed)
        feed = {discriminator.input_x: samples, discriminator.dropout_keep_prob:1.0}
        ypred_for_auc = sess.run(discriminator.ypred_for_auc, feed)
        ypred = np.array([item[1] for item in ypred_for_auc])
        if i == 0:
          rewards.append(ypred)
        else:
          rewards[given_num-1] += ypred

      feed = {discriminator.input_x: input_x, discriminator.dropout_keep_prob:1.0}
      ypred_for_auc = sess.run(discriminator.ypred_for_auc, feed)
      ypred = np.array([item[1] for item in ypred_for_auc])
      if i == 0:
        rewards.append(ypred)
      else:
        rewards[self.sequence_length - 1] += ypred

    rewards = np.transpose(np.array(rewards))/ (1.0 * rollout_num)
    return rewards

    def create_recurrent_unit(self):
        # Weights and Bias for input and hidden tensor
        self.Wi = tf.identity(self.lstm.Wi)
        self.Ui = tf.identity(self.lstm.Ui)
        self.bi = tf.identity(self.lstm.bi)

        self.Wf = tf.identity(self.lstm.Wf)
        self.Uf = tf.identity(self.lstm.Uf)
        self.bf = tf.identity(self.lstm.bf)

        self.Wog = tf.identity(self.lstm.Wog)
        self.Uog = tf.identity(self.lstm.Uog)
        self.bog = tf.identity(self.lstm.bog)

        self.Wc = tf.identity(self.lstm.Wc)
        self.Uc = tf.identity(self.lstm.Uc)
        self.bc = tf.identity(self.lstm.bc)

        def unit(x, hidden_memory_tm1):
            previous_hidden_state, c_prev = tf.unstack(hidden_memory_tm1)

            # Input Gate
            i = tf.sigmoid(
                tf.matmul(x, self.Wi) +
                tf.matmul(previous_hidden_state, self.Ui) + self.bi
            )

            # Forget Gate
            f = tf.sigmoid(
                tf.matmul(x, self.Wf) +
                tf.matmul(previous_hidden_state, self.Uf) + self.bf
            )

            # Output Gate
            o = tf.sigmoid(
                tf.matmul(x, self.Wog) +
                tf.matmul(previous_hidden_state, self.Uog) + self.bog
            )

            # New Memory Cell
            c_ = tf.nn.tanh(
                tf.matmul(x, self.Wc) +
                tf.matmul(previous_hidden_state, self.Uc) + self.bc
            )

            # Final Memory cell
            c = f * c_prev + i * c_

            # Current Hidden state
            current_hidden_state = o * tf.nn.tanh(c)

            return tf.stack([current_hidden_state, c])

        return unit

    def update_recurrent_unit(self):
        # Weights and Bias for input and hidden tensor
        self.Wi = self.update_rate * self.Wi + (1 - self.update_rate) * tf.identity(self.lstm.Wi)
        self.Ui = self.update_rate * self.Ui + (1 - self.update_rate) * tf.identity(self.lstm.Ui)
        self.bi = self.update_rate * self.bi + (1 - self.update_rate) * tf.identity(self.lstm.bi)

        self.Wf = self.update_rate * self.Wf + (1 - self.update_rate) * tf.identity(self.lstm.Wf)
        self.Uf = self.update_rate * self.Uf + (1 - self.update_rate) * tf.identity(self.lstm.Uf)
        self.bf = self.update_rate * self.bf + (1 - self.update_rate) * tf.identity(self.lstm.bf)

        self.Wog = self.update_rate * self.Wog + (1 - self.update_rate) * tf.identity(self.lstm.Wog)
        self.Uog = self.update_rate * self.Uog + (1 - self.update_rate) * tf.identity(self.lstm.Uog)
        self.bog = self.update_rate * self.bog + (1 - self.update_rate) * tf.identity(self.lstm.bog)

        self.Wc = self.update_rate * self.Wc + (1 - self.update_rate) * tf.identity(self.lstm.Wc)
        self.Uc = self.update_rate * self.Uc + (1 - self.update_rate) * tf.identity(self.lstm.Uc)
        self.bc = self.update_rate * self.bc + (1 - self.update_rate) * tf.identity(self.lstm.bc)

        def unit(x, hidden_memory_tm1):
            previous_hidden_state, c_prev = tf.unstack(hidden_memory_tm1)

            # Input Gate
            i = tf.sigmoid(
                tf.matmul(x, self.Wi) +
                tf.matmul(previous_hidden_state, self.Ui) + self.bi
            )

            # Forget Gate
            f = tf.sigmoid(
                tf.matmul(x, self.Wf) +
                tf.matmul(previous_hidden_state, self.Uf) + self.bf
            )

            # Output Gate
            o = tf.sigmoid(
                tf.matmul(x, self.Wog) +
                tf.matmul(previous_hidden_state, self.Uog) + self.bog
            )

            # New Memory Cell
            c_ = tf.nn.tanh(
                tf.matmul(x, self.Wc) +
                tf.matmul(previous_hidden_state, self.Uc) + self.bc
            )

            # Final Memory cell
            c = f * c_prev + i * c_

            # Current Hidden state
            current_hidden_state = o * tf.nn.tanh(c)

            return tf.stack([current_hidden_state, c])

        return unit

    def create_output_unit(self):
        self.Wo = tf.identity(self.lstm.Wo)
        self.bo = tf.identity(self.lstm.bo)

        def unit(hidden_memory_tuple):
            hidden_state, c_prev = tf.unstack(hidden_memory_tuple)
            # hidden_state : batch x hidden_dim
            logits = tf.matmul(hidden_state, self.Wo) + self.bo
            # output = tf.nn.softmax(logits)
            return logits

        return unit

    def update_output_unit(self):
        self.Wo = self.update_rate * self.Wo + (1 - self.update_rate) * tf.identity(self.lstm.Wo)
        self.bo = self.update_rate * self.bo + (1 - self.update_rate) * tf.identity(self.lstm.bo)

        def unit(hidden_memory_tuple):
            hidden_state, c_prev = tf.unstack(hidden_memory_tuple)
            # hidden_state : batch x hidden_dim
            logits = tf.matmul(hidden_state, self.Wo) + self.bo
            # output = tf.nn.softmax(logits)
            return logits

        return unit

    def update_params(self):
        self.g_embeddings = tf.identity(self.lstm.g_embeddings)
        self.g_recurrent_unit = self.update_recurrent_unit()
        self.g_output_unit = self.update_output_unit()


In [None]:
# @title Generator Hyperparameters
#Hyperparameters

HIDDEN_DIM = 32 # hidden state dimension of lstm cell
SEQ_LENGTH = 20 # sequence length
START_TOKEN = 0
PRE_EPOCH_NUM = 120 # supervise (maximum likelihood estimation) epochs
SEED = 88
BATCH_SIZE = 64





In [None]:
# @title Discriminator Hyperparameters

dis_dropout_keep_prob = 0.75
dis_l2_reg_lambda = 0.2
dis_batch_size = 64


In [None]:
# @title General Hyperparameters and Directories

TOTAL_BATCH = 200
positive_file = 'save/real_data.txt'
negative_file = 'save/generator_sample.txt'
eval_file = 'save/eval_file.txt'
generated_num = 10000


In [None]:
# @title Main Model Code

def generate_samples(sess, trainable_model, batch_size, generated_num, output_file):
  generated_samples = []
  # dont forget the basics of ''join dont forget the basics of '' join, dont forget the basics of '' join

  for _ in range(int(generated_num/batch_size)):
    generated_samples.extend(trainable_model.generate(sess))

  with open(output_file, 'w' ) as fout:
    for sequence in generated_samples:
      buffer = ','.join([str(x) for x in sequence]) + '\n'
      fout.write(buffer)

# Code to test the distributional loss against the standard model.

def target_loss(sess, target_lstm, data_loader):

    nll = []
    data_loader.reset_pointer()

    for it in range(data_loader.num_batch):
        batch = data_loader.next_batch()
        g_loss = sess.run(target_lstm.pretrain_loss, {target_lstm.x: batch})
        nll.append(g_loss)

    return np.mean(nll)


# Code for each pretraining epoch of the model

def pre_train_epoch(sess, trainable_model, data_loader):
  supervised_g_losses = []
  data_loader.reset_pointer()
  for it in range(data_loader.num_batch()):
    batch = data_loader.next_batch()
    _, g_loss = trainable_model.pretrain_step(sess,batch)
    supervised_g_losses.append(g_loss)

def main():
  random.seed(SEED)
  np.random.seed(SEED)
  assert START_TOKEN == 0


  gen_data_loader = gen_Data_loader(BATCH_SIZE)
  likelihood_data_loader = Gen_Data_loader(BATCH_SIZE)
  dis_data_loader = Dis_dataloader(BATCH_SIZE)
  generator = Generator(1000, 50, 21,START_TOKEN)
  ################ CODE FOR THE TARGET MODEL WE MUST BE ABLE TO GET
  ################################################################
  discriminator = Discriminator(sequence_length=20, num_classes=2,filter_sizes=dis_filter_sizes, num_filters=dis_num_filters, l2_reg_lambda=0.4)

  config = tf.ConfigProto()
  config.gpu_options.allow_growth = True
  sess = tf.Session(config=config)
  sess.run(tf.global_variables_initializer())
  #################################### Pre Training of the Generator
  print("Pre Training Generator")

  log = open('save/experiment-log.txt', 'w')
  for epoch in range(PRE_EPOCH_NUM):
    loss = pre_train_epoch(sess, generator, gen_data_loader)
    if epoch % 5 == 0:
      generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file)
      likelihood_data_loader.create_batches(eval_file)
      test_loss = target_loss(sess, target_lstm, likelihood_data_loader)
      buffer = 'epoch:\t'+ str(epoch) + '\tnll:\t' + str(test_loss) + '\n'
      log.write(buffer)

######################################## Pre Training of the Discriminator
  print("Pre Training Discriminator")

  for _ in range(50):
    generate_samples(sess,generator, BATCH_SIZE, generated_num, negative_file)
    dis_data_loader.load_train_data(positve_file, negative_file)

    for _ in range(3):
      dis_data_loader.reset_pointer()
      for it in range(dis_data_loader.num_batch):
        x_batch, y_batch = dis_data_loader.next_batch()
        feed = {
            discriminator.input_x: x_batch,
            discriminator.input_y: y_batch,
            discriminator.dropout_keep_prob: dis_dropout_keep_prob

        }
        _ = sess.run(discriminator.train_op, feed)

    rollout = ROLLOUT(generator, 0.8)
################################################################# Adversarial Training
  log.write('adversarial training...\n')

  for total_batch in range(TOTAL_BATCH):
    for it in range(1):
      samples = generator.generate(sess)
      rewards = rollout.get_reward(sess, samples, 16, discriminator)
      feed = {generator.x: samples, generator.rewards: rewards}
      _ = sess.run(generator.g_updates, feed_dict=feed)

    if total_batch % 5 == 0 or total_batch == TOTAL_BATCH -1:
      generate_samples(sess, generator, BATCH_SIZE, generated_num, eval_file)
      likelihood_data_loader.create_batches(eval_file)
      test_loss = target_loss(sess, target_lstm, likelihood_data_loader)
      buffer = 'epoch:\t' + str(total_batch) + '\tnll:\t' + str(test_loss) + '\n'
      print ('total_batch: ', total_batch, 'test_loss: ', test_loss)
      log.write(buffer)


    rollout.update_params()

    for _ in range(5):
      generate_samples(sess, generator, BATCH_SIZE, generated_num, negative_file)
      dis_data_loader.load_train_data(positive_file, negative_file)

      for _ in range(3):
        dis_data_loader.reset_pointer()
        for it in range(dis_data_loader.num_batch):
          x_batch, y_batch = dis_data_loader.next_batch()

          feed = {
              discriminator.input_x: x_batch,
              discriminator.input_y: y_batch,
              discriminator.dropout_keep_prob: dis_dropout_keep_prob

          }
          _ = sess.run(discriminator.train_op, feed)

    log.close()


#    if __name__ == '__main__':
#      main()










