In [1]:
%load_ext nb_black

<IPython.core.display.Javascript object>

In [2]:
import os
import sys
import random
import numpy as np
import pandas as pd
import tensorflow.compat.v1 as tf

tf.disable_v2_behavior()

Instructions for updating:
non-resource variables are not supported in the long term


<IPython.core.display.Javascript object>

In [3]:
class MLP:
    def __init__(self, vocab_size, hidden_size):
        self._vocab_size = vocab_size
        self._hidden_size = hidden_size

    def build_graph(self):
        self._X = tf.placeholder(tf.float32, shape=[None, self._vocab_size])
        self._real_Y = tf.placeholder(
            tf.int32,
            shape=[
                None,
            ],
        )
        weights_1 = tf.get_variable(
            name="weight_input_hidden",
            shape=(self._vocab_size, self._hidden_size),
            initializer=tf.random_normal_initializer(seed=2021),
        )
        biases_1 = tf.get_variable(
            name="biases_input_hidden",
            shape=(self._hidden_size),
            initializer=tf.random_normal_initializer(seed=2021),
        )

        weights_2 = tf.get_variable(
            name="weights_hidden_output",
            shape=(self._hidden_size, NUM_CLASSES),
            initializer=tf.random_normal_initializer(seed=2021),
        )

        biases_2 = tf.get_variable(
            name="biases_hidden_output",
            shape=(NUM_CLASSES),
            initializer=tf.random_normal_initializer(seed=2021),
        )

        hidden = tf.matmul(self._X, weights_1) + biases_1  # Net input
        hidden = tf.sigmoid(hidden)  # activation function.

        logits = tf.matmul(hidden, weights_2) + biases_2

        labels_one_hot = tf.one_hot(
            indices=self._real_Y, depth=NUM_CLASSES, dtype=tf.float32
        )
        loss = tf.nn.softmax_cross_entropy_with_logits_v2(
            labels=labels_one_hot, logits=logits
        )
        loss = tf.reduce_mean(loss)
        probs = tf.nn.softmax(logits)  # difference between 2 distributions.
        predicted_labels = tf.argmax(probs, axis=1)
        predicted_labels = tf.squeeze(predicted_labels)
        return predicted_labels, loss

    def trainer(self, loss, learning_rate):
        train_op = tf.train.AdamOptimizer(learning_rate).minimize(loss)
        return train_op

<IPython.core.display.Javascript object>

In [4]:
class DataReader:
    def __init__(self, data_path, batch_size, vocab_size):
        self._batch_size = batch_size
        with open(data_path) as f:
            d_lines = f.read().splitlines()

        self._data = []
        self._labels = []
        for data_id, line in enumerate(d_lines):
            vector = [0.0 for _ in range(vocab_size)]
            features = line.split("<fff>")
            label, doc_id = int(features[0]), int(features[1])
            tokens = features[2].split()
            for token in tokens:
                index, value = int(token.split(":")[0]), float(token.split(":")[1])
                vector[index] = value
            self._data.append(vector)
            self._labels.append(label)

        self._data = np.array(self._data)
        self._labels = np.array(self._labels)
        self._num_epoch = 0
        self._batch_id = 0

    def next_batch(self):
        start = self._batch_id * self._batch_size
        end = start + self._batch_size
        self._batch_id += 1

        if end + self._batch_size > len(self._data):
            end = len(self._data)
            self._num_epoch += 1
            self._batch_id = 0
            indices = list(range(len(self._data)))
            random.seed(2021)
            random.shuffle(indices)
            self._data, self._labels = self._data[indices], self._labels[indices]
        return self._data[start:end], self._labels[start:end]

<IPython.core.display.Javascript object>

In [5]:
def load_dataset(vocab_size):
    train_data_reader = DataReader(
        data_path=os.getcwd() + "/20news-bydate/20news-train-tf-idf.txt",
        batch_size=50,
        vocab_size=vocab_size,
    )

    test_data_reader = DataReader(
        data_path=os.getcwd() + "/20news-bydate/20news-test-tf-idf.txt",
        batch_size=50,
        vocab_size=vocab_size,
    )

    return train_data_reader, test_data_reader

<IPython.core.display.Javascript object>

In [6]:
def save_parameters(name, value, epoch):
    filename = name.replace(":", "-colon-") + "-epoch-{}.txt".format(epoch)
    if len(value.shape) == 1:
        string_form = ",".join([str(number) for number in value])
    else:
        string_form = "\n".join(
            [
                ",".join([str(number) for number in value[row]])
                for row in range(value.shape[0])
            ]
        )
    with open(os.getcwd() + "/saved-paras/" + filename, "w") as f:
        f.write(string_form)

<IPython.core.display.Javascript object>

In [7]:
def restore_parameters(name, epoch):
    filename = name.replace(":", "-colon-") + "-epoch-{}.txt".format(epoch)
    with open(os.getcwd() + "/saved-paras/" + filename, "r") as f:
        lines = f.read().splitlines()
    if len(lines) == 1:
        value = [float(number) for number in lines[0].split(",")]
    else:
        value = [
            [float(number) for number in lines[row].split(",")]
            for row in range(len(lines))
        ]
    return value

<IPython.core.display.Javascript object>

In [8]:
NUM_CLASSES = 20
with open(os.getcwd() + '/20news-bydate/20news-full-words-idfs.txt') as f:
    vocab_size = len(f.read().splitlines())
mlp = MLP(vocab_size = vocab_size, hidden_size = 50)
predicted_labels, loss = mlp.build_graph()
train_op = mlp.trainer(loss = loss, learning_rate = 0.1)
with tf.Session() as sess:
    train_data_reader, test_data_reader = load_dataset(vocab_size)
    max_step = 500
    sess.run(tf.global_variables_initializer())
    for step in range(max_step):
        train_data, train_labels = train_data_reader.next_batch()
        labels_eval, loss_eval, _ = sess.run(
            [predicted_labels, loss, train_op],
            feed_dict={
                mlp._X: train_data,
                mlp._real_Y: train_labels
            }
        )
        print('step: {}, loss: {}'.format(step, loss_eval))
        if (train_data_reader._batch_id == 0):
            trainable_variables = tf.trainable_variables()
            for variable in trainable_variables:
                save_parameters(
                name = variable.name,
                value = variable.eval(),
                epoch = train_data_reader._num_epoch
            )
with tf.Session() as sess:
    epoch = train_data_reader._num_epoch
    trainable_variables = tf.trainable_variables()
    for variable in trainable_variables:
        saved_value = restore_parameters(variable.name,epoch)
        assign_op = variable.assign(saved_value)
        sess.run(assign_op)
    num_true_preds = 0
    while True:  
        test_data, test_labels = test_data_reader.next_batch()
        test_labels_eval = sess.run(
            predicted_labels,
            feed_dict = {
                mlp._X: test_data,
                mlp._real_Y: test_labels
            }
        )
        matches = np.equal(test_labels_eval, test_labels)
        num_true_preds += np.sum(matches.astype(float))
        if test_data_reader._batch_id == 0:
            break
    print('Epoch:', epoch)
    print('Accuracy on test data:', num_true_preds/len(test_data_reader._data))


2022-03-29 12:51:35.751559: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


step: 0, loss: 11.104410171508789
step: 1, loss: 0.9813809990882874
step: 2, loss: 0.0006089996313676238
step: 3, loss: 2.036018940998474e-06
step: 4, loss: 1.907348234908568e-08
step: 5, loss: 0.0
step: 6, loss: 0.0
step: 7, loss: 0.0
step: 8, loss: 0.0
step: 9, loss: 14.782851219177246
step: 10, loss: 35.11439514160156
step: 11, loss: 31.772096633911133
step: 12, loss: 26.245704650878906
step: 13, loss: 20.384763717651367
step: 14, loss: 12.772749900817871
step: 15, loss: 6.130094528198242
step: 16, loss: 0.990744948387146
step: 17, loss: 0.06624089926481247
step: 18, loss: 0.0001084230825654231
step: 19, loss: 1.053785354088177e-06
step: 20, loss: 0.0
step: 21, loss: 24.634183883666992
step: 22, loss: 32.63716506958008
step: 23, loss: 29.58038902282715
step: 24, loss: 26.681072235107422
step: 25, loss: 22.07748794555664
step: 26, loss: 17.576826095581055
step: 27, loss: 12.419458389282227
step: 28, loss: 8.495309829711914
step: 29, loss: 4.281155109405518
step: 30, loss: 1.077122688

step: 279, loss: 1.477145791053772
step: 280, loss: 1.3441356420516968
step: 281, loss: 1.2678834199905396
step: 282, loss: 1.2775131464004517
step: 283, loss: 1.7493427991867065
step: 284, loss: 0.9106053113937378
step: 285, loss: 1.5260062217712402
step: 286, loss: 1.3682018518447876
step: 287, loss: 1.4743714332580566
step: 288, loss: 1.2332533597946167
step: 289, loss: 1.3671822547912598
step: 290, loss: 1.5977027416229248
step: 291, loss: 1.6013449430465698
step: 292, loss: 1.0792049169540405
step: 293, loss: 1.3622686862945557
step: 294, loss: 1.2012332677841187
step: 295, loss: 1.1996619701385498
step: 296, loss: 1.0319435596466064
step: 297, loss: 1.0828254222869873
step: 298, loss: 1.460838794708252
step: 299, loss: 0.8668910264968872
step: 300, loss: 0.6195905804634094
step: 301, loss: 1.0267127752304077
step: 302, loss: 1.3063524961471558
step: 303, loss: 1.139296293258667
step: 304, loss: 1.0466418266296387
step: 305, loss: 0.9359961152076721
step: 306, loss: 0.821762979030

<IPython.core.display.Javascript object>