In [1]:
%tensorflow_version 1.x

# tf.disable_v2_behavior()
# from tensorflow.python.keras import Sequential
# from tensorflow.python.keras.layers import Dense, Lambda
# from keras.utils.np_utils import to_categorical
# import keras.backend as K

TensorFlow 1.x selected.


In [2]:
import tensorflow as tf
print(tf.__version__)

1.15.2


In [3]:
class MLP:
    def __init__(self, vocab_size, hidden_size, num_classes):
        self.vocab_size = vocab_size
        self.hidden_size = hidden_size
        self.num_classes = num_classes
    def build_graph(self):
        self.X = tf.placeholder(tf.float32, shape=[None, self.vocab_size])
        self.y_real = tf.placeholder(tf.int32, shape=[None,])
        weight_1 = tf.get_variable(name='weight_input_hidden', 
                         shape=(self.vocab_size, self.hidden_size),
                         initializer=tf.random_normal_initializer(seed=2018))
        biases_1 = tf.get_variable(name='biases_input_hidden', 
                               shape=(self.hidden_size),
                               initializer=tf.random_normal_initializer(seed=2018))
        weight_2 = tf.get_variable(name='weight_hidden_output', 
                         shape=(self.hidden_size, self.num_classes),
                         initializer=tf.random_normal_initializer(seed=2018))
        biases_2 = tf.get_variable(name='biases_hidden_output', 
                               shape=(self.num_classes),
                               initializer=tf.random_normal_initializer(seed=2018))
        
        hidden = tf.matmul(self.X, weight_1) + biases_1
        hidden = tf.sigmoid(hidden)

        logits = tf.matmul(hidden, weight_2) + biases_2

        labels_one_hot = tf.one_hot(indices=self.y_real, depth=self.num_classes, dtype=tf.float32)
        loss = tf.nn.softmax_cross_entropy_with_logits(labels=labels_one_hot, logits=logits)
   
        loss = tf.reduce_mean(loss)
        probs = tf.nn.softmax(logits)
        predicted_labels = tf.argmax(probs, axis=1)
        predicted_labels = tf.squeeze(predicted_labels)

        return predicted_labels, loss
      
    def trainer(self, loss, learning_rate):
        train_op = tf.train.AdamOptimizer(learning_rate).minimize(loss)
        return train_op


In [4]:
import numpy as np

class DataReader:
  def __init__(self, data_path, batch_size, vocab_size):
    self.batch_size = batch_size
    with open(data_path) as f:
      d_lines = f.read().splitlines()
    
    self.data = []
    self.labels = []
    for data_id, line in enumerate(d_lines):
      vector = [0. for i in range(vocab_size)]
      features = line.split('<fff>')
      label, doc_id = int(features[0]), int(features[1])
      tokens = features[2].split()
      for token in tokens:
        index, value = int(token.split(':')[0]), float(token.split(':')[1])
        vector[index] = value
      self.data.append(vector)
      self.labels.append(label)
    
    self.data = np.array(self.data)
    self.labels = np.array(self.labels)

    self.num_epoch = 0
    self.batch_id = 0
  
  def next_batch(self):
    start = self.batch_id * self.batch_size
    end = start + self.batch_size
    self.batch_id += 1

    if end + self.batch_size > len(self.data):
      end = len(self.data)
      self.num_epoch += 1
      self.batch_id = 0
      indices = np.arange(len(self.data))
      np.random.seed(2018)
      np.random.shuffle(indices)
      self.data, self.labels = self.data[indices], self.labels[indices]

    return self.data[start:end], self.labels[start:end]


In [5]:
with open('word_idfs.txt', 'rb') as f:
  vocab_size = len(f.read().splitlines())

mlp = MLP(vocab_size=vocab_size, hidden_size=50,num_classes=20)
predicted_labels, loss = mlp.build_graph()
train_op = mlp.trainer(loss=loss, learning_rate=0.1)


Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See `tf.nn.softmax_cross_entropy_with_logits_v2`.



In [6]:
def load_datasets():
  train_data_reader = DataReader(data_path='20news-train-tfidf.txt', batch_size=50, vocab_size=vocab_size)
  test_data_reader = DataReader(data_path='20news-test-tfidf.txt', batch_size=50, vocab_size=vocab_size)
  return train_data_reader, test_data_reader  

def save_parameters(name, value, epoch):
  filename = name.replace(':', '-colon-') + '-epoch-{}.txt'.format(epoch)
  if len(value.shape) == 1:
    string_form = ','.join([str(number) for number in value])
  else:
    string_form = '\n'.join([','.join([str(number)
                                        for number in value[row]])
                                          for row in range(value.shape[0])])
    
  with open(filename, 'w') as f:
    f.write(string_form)

In [7]:
def restore_parameters(name, epoch):
  filename = name.replace(':', '-colon-') + '-epoch-{}.txt'.format(epoch)
  with open(filename) as f:
    lines = f.read().splitlines()
  if len(lines) == 1:
    value = [float(number) for number in lines[0].split(',')]
  else:
    value = [[float(number) for number in lines[row].split(',')]
              for row in range(len(lines))]
  return value

In [None]:
with tf.Session() as sess:
  train_data_reader, test_data_reader = load_datasets()
  step, MAX_STEP = 0 , 100**2

  sess.run(tf.global_variables_initializer())
  while step < MAX_STEP:
    train_data, train_labels = train_data_reader.next_batch()
    plabels_eval, loss_eval, _ = sess.run(
        [predicted_labels, loss, train_op],
        feed_dict={
            mlp.X: train_data,
            mlp.y_real: train_labels
        }
    )
    step+=1
    print('step: {}, loss: {}'.format(step, loss_eval))
  trainable_variables = tf.trainable_variables()
  for variable in trainable_variables:
    save_parameters(name=variable.name, value=variable.eval(), epoch=train_data_reader.num_epoch) 

In [11]:
with tf.Session() as sess:
  epoch = 44

  trainable_variables = tf.trainable_variables()
  for variable in trainable_variables:
    saved_value = restore_parameters(variable.name, epoch)
    assign_op = variable.assign(saved_value)
    sess.run(assign_op) 

  num_true_preds = 0
  while True:
    test_data, test_labels = test_data_reader.next_batch()
    test_plabels_eval = sess.run(predicted_labels,
                                 feed_dict={
                                     mlp.X: test_data,
                                     mlp.y_real: test_labels
                                 }
                                )
    matches = np.equal(test_plabels_eval, test_labels)
    num_true_preds += np.sum(matches.astype(float))

    if test_data_reader.batch_id == 0:
      break
    
  print('Epoch:', epoch)
  print('Accuracy on test data:', num_true_preds / len(test_data_reader.data))

Epoch: 44
Accuracy on test data: 0.7525225703664365
