In [3]:
%load_ext autoreload
%autoreload 2

!pip3 install --quiet tensorflow-gpu==1.13.1
!pip3 install --quiet tensorflow-hub
!pip3 install --quiet tf-sentencepiece

import tensorflow as tf
print("Version: ", tf.__version__)
print("Eager mode: ", tf.executing_eagerly())
print("GPU is", "available" if tf.test.is_gpu_available() else "NOT AVAILABLE")

import os, sys,datetime
from google.colab import drive
drive.mount('/content/gdrive')
gitDir = "/content/gdrive/My Drive/nlp/"
os.chdir(gitDir + "training/")
os.chdir(gitDir)
print(os.listdir("."))

#sys.path.insert(0,gitDir + "data")

#import training.train as t

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Version:  1.13.1
Eager mode:  False
GPU is available
Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).
['.git', 'README.md', 'data', 'presentations', '.idea', 'training', 'blobs', '.gitignore', 'serverblobs', 'template.py']


In [19]:
%%time


import tensorflow as tf
import tensorflow_hub as hub
import numpy as np
import tf_sentencepiece

from data import load_dataset
from tensorflow.keras.layers import *
from tensorflow.keras.models import *

import glob, time, datetime, os
import matplotlib.pyplot as plt
#from tqdm import tqdm_notebook as tqdm
from tqdm import tqdm as tqdm

from sklearn.metrics import f1_score

params = {}
params["trainData"] = "US"
params["testData"] = "DE"
params["epochs"] = 1
params["batchSize"] = 512
params["optimizer"] = tf.train.AdamOptimizer()
params["trainexamples"] = 1000 * 1
params["architecture"] = [False]
params["f1modus"] = "micro"
params["savelog"] = False
params["path"] = "blobs/" + datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + "_myTitle/"
#params["path"] = "blobs/test/"
#params["pathToCache"] = "../data/"


class Model:
    def __init__(self, data_X, data_Y, params):
        self.params = params
        self.n_class = 39
        self.architecture = params["architecture"][1:]
        #print("Downloading xling...")
        self.xling = hub.Module("https://tfhub.dev/google/universal-sentence-encoder-xling-many/1", trainable=params["architecture"][0])
        self.data_X = data_X
        self.data_Y = data_Y
        self.create_architecture(data_X, data_Y)

    def create_architecture(self, data_X, data_Y):
        # y_hot = tf.one_hot(data_Y, depth=self.n_class)
        self.logits = self.forward(data_X)

        self.loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=data_Y, logits=self.logits))
        self.train_op = self.params["optimizer"].minimize(self.loss)

        self.predictions = tf.argmax(self.logits, 1)
        self.labels = data_Y
        # self.acc, self.acc_op = tf.metrics.accuracy(labels=data_Y, predictions=self.predictions)

        # a = tf.cast(self.predictions, tf.float64)
        self.accuracy = tf.reduce_mean(tf.cast(tf.equal(self.predictions, tf.cast(data_Y, tf.int64)), tf.float32))

    def forward(self, X):
        output = self.xling(X)

        for x in self.architecture:
            if x == "bn":
                output = tf.layers.batch_normalization(output, training=True)
            elif x == "relu" or x == "r":
                output = tf.nn.relu(output)
            elif x == "dropout" or x == "d":
                output = tf.layers.dropout(output)
            else:
                output = tf.layers.dense(output, x)

        output = tf.layers.dense(output, self.n_class, name="final_output_prediction")

        return output
    

def trainModel(p):
    # init default params
    params = {}
    #params["trainData"] = "US"
    #params["testData"] = "DE"
    params["epochs"] = 15
    params["batchSize"] = 512
    params["optimizer"] = tf.train.AdamOptimizer()
    params["trainexamples"] = 1000 * 100
    #params["architecture"] = [False]
    params["f1modus"] = "micro"
    params["savelog"] = True
    params["path"] = "blobs/" + datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + "/"
    params["pathToCache"] = "data/"

    params.update(p)  # overwrite default parameter with passed parameter

    if params["savelog"] == True:
        '''
        if params["path"] is None:
            path = '/content/gdrive/My Drive/nlp/blobs/' + datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + "/"
            os.mkdir(path)
            params["path"] = path
        else:
            path = params["path"]
        '''

        print("saving to:", params["path"])
        if os.path.exists(params["path"]) is False:
            os.mkdir(params["path"])
        f = open(params["path"] + "info.txt", "w")
        for k in params:
            f.write(k + ": " + str(params[k]) + "\n")
        f.close()


    tf.reset_default_graph()
    dataset_train = load_dataset.getData(params["trainData"], shuffle=True, batchsize=params["batchSize"], pathToCache=params["pathToCache"])
    dataset_val = load_dataset.getData(params["testData"], shuffle=False, batchsize=params["batchSize"], pathToCache=params["pathToCache"])

    if params["trainexamples"] is not None:
        dataset_train = dataset_train.take(int(params["trainexamples"] / params["batchSize"]))
        dataset_val = dataset_val.take(int(params["trainexamples"] / params["batchSize"]))

    iterator = tf.data.Iterator.from_structure(dataset_train.output_types, dataset_train.output_shapes)
    train_iterator = iterator.make_initializer(dataset_train)
    val_iterator = iterator.make_initializer(dataset_val)
    text_input, label = iterator.get_next()

    model = Model(text_input, label, params)

    init_op = tf.group([tf.local_variables_initializer(), tf.global_variables_initializer(), tf.tables_initializer()])
    sess = tf.Session()
    sess.run(init_op)

    loss_hist, acc_hist, val_loss_hist, val_acc_hist = [], [], [], []
    loss_hist_epoch, acc_hist_epoch, val_loss_hist_epoch, val_acc_hist_epoch, f1_train_epoch, f1_val_epoch = [], [], [], [], [], []
    train_predictions, train_labels, val_predictions, val_labels = [], [], [], []

    startTime = time.time()
    for epoch in tqdm(range(params["epochs"])):
        # print('\nEpoch: {}'.format(epoch + 1))
        train_loss, train_accuracy = 0, 0
        val_loss, val_accuracy = 0, 0
        counter = 0

        sess.run(train_iterator)

        try:
            with tqdm(total=params["trainexamples"]) as pbar:
                while True:
                    _, a, l, predictions, labels = sess.run(
                        [model.train_op, model.accuracy, model.loss, model.predictions, model.labels])
                    # print(a,l)

                    '''
                    if l > 0 and l < 15:
                        pass
                    else:
                        print(l)
                        print(counter)
                        print(sess.run(model.data_X))
                        # print(tf.print(model.data_Y))
                    '''

                    train_loss += l
                    train_accuracy += a
                    loss_hist.append(l)
                    acc_hist.append(a)
                    pbar.set_postfix_str((l, a))
                    pbar.update(params["batchSize"])

                    train_predictions.extend(predictions)
                    train_labels.extend(labels)

                    counter += 1
        except tf.errors.OutOfRangeError:
            pass
            # print("\tfinished after", counter, "batches.")

        loss_hist_epoch.append(train_loss / counter)
        acc_hist_epoch.append(train_accuracy / counter)
        train_f1 = f1_score(train_labels, train_predictions, average=params["f1modus"])
        f1_train_epoch.append(train_f1)
        # print('\nEpoch: {}'.format(epoch + 1))

        # Validation
        counter = 0
        sess.run(val_iterator)
        try:
            with tqdm(total=params["trainexamples"]) as pbar:
                while True:
                    a, l, p, labels = sess.run([model.accuracy, model.loss, model.predictions, model.labels])
                    val_loss += l
                    val_accuracy += a
                    val_loss_hist.append(l)
                    val_acc_hist.append(a)
                    pbar.set_postfix_str((l, a))
                    pbar.update(params["batchSize"])

                    val_predictions.extend(p)
                    val_labels.extend(labels)

                    counter += 1
        except tf.errors.OutOfRangeError:
            pass
            # print("\tfinished after", counter, "batches.")

        val_loss_hist_epoch.append(val_loss / counter)
        val_acc_hist_epoch.append(val_accuracy / counter)
        val_f1 = f1_score(val_labels, val_predictions, average=params["f1modus"])
        f1_val_epoch.append(val_f1)
        print(
            '\n\tEpoch {}: train_loss: {:.4f}, train_acc: {:.4f}, train_micro-f1: {:.4f} || val_loss: {:.4f}, val_acc: {:.4f}, val_micro-f1: {:.4f}'.format(
                epoch + 1, loss_hist_epoch[-1], acc_hist_epoch[-1], train_f1, val_loss_hist_epoch[-1],
                val_acc_hist_epoch[-1], val_f1))


        # Epoch finished - update and save results
        trainingTime = time.time() - startTime
        result = {}
        result["loss_hist_epoch"] = loss_hist_epoch
        result["acc_hist_epoch"] = acc_hist_epoch
        result["val_loss_hist_epoch"] = val_loss_hist_epoch
        result["val_acc_hist_epoch"] = val_acc_hist_epoch
        result["f1_train_epoch"] = f1_train_epoch
        result["f1_val_epoch"] = f1_val_epoch
        result["loss_hist_epoch"] = loss_hist_epoch
        result["loss_hist_epoch"] = loss_hist_epoch
        result["train_time_seconds"] = trainingTime
        result["train_time_minutes"] = trainingTime / 60

        if params["savelog"] == True:
            f = open(params["path"] + "result.txt", "w")
            for k in result:
                f.write(k + ": " + str(result[k]) + "\n")
            f.close()

            # save plots
            path = params["path"]
            print("saving results to:", path)
            a = params["architecture"]
            plotResults(loss_hist_epoch, "train_loss", val_loss_hist_epoch, "val_loss", str(a) + " loss", path, a)
            plotResults(acc_hist_epoch, "acc_train", val_acc_hist_epoch, "acc_val", str(a) + " acc", path, a)
            plotResults(f1_train_epoch, "f1_train", f1_val_epoch, "f1_val", str(a) + " f1", path, a)

    
    
    
    
    #plotAll(path)
    #sess.close()

    return result, sess  
  
  
result, sess = trainModel(params)

data/cache/amazon_reviews_multilingual_US_v1_00.tsv.shuffled.csv already exists. Using cached data
data/cache/amazon_reviews_multilingual_DE_v1_00.tsv.shuffled.csv already exists. Using cached data
INFO:tensorflow:Saver not created because there are no variables in the graph to restore


I0614 15:54:18.041616 140022028396416 saver.py:1483] Saver not created because there are no variables in the graph to restore

  0%|          | 0/1 [00:00<?, ?it/s][A

  0%|          | 0/1000 [00:00<?, ?it/s][A[A

  0%|          | 0/1000 [00:02<?, ?it/s, (3.6507633, 0.037109375)][A[A

 51%|█████     | 512/1000 [00:02<00:02, 184.65it/s, (3.6507633, 0.037109375)][A[A

[A[A

  0%|          | 0/1000 [00:00<?, ?it/s][A[A

  0%|          | 0/1000 [00:02<?, ?it/s, (3.6521745, 0.044921875)][A[A

 51%|█████     | 512/1000 [00:02<00:02, 205.19it/s, (3.6521745, 0.044921875)][A[A

[A[A
100%|██████████| 1/1 [00:07<00:00,  7.70s/it][A
[A


	Epoch 1: train_loss: 3.6508, train_acc: 0.0371, train_micro-f1: 0.0371 || val_loss: 3.6522, val_acc: 0.0449, val_micro-f1: 0.0449
CPU times: user 39.5 s, sys: 2.06 s, total: 41.6 s
Wall time: 41.3 s


In [20]:
!rm -rf /content/SavedModel

builder = tf.saved_model.builder.SavedModelBuilder('/content/SavedModel/')

builder.add_meta_graph_and_variables(sess,
                                       [tf.saved_model.tag_constants.TRAINING],
                                       signature_def_map=None,
                                       assets_collection=None)
builder.save()  

!ls -alh /content/SavedModel

INFO:tensorflow:No assets to save.


I0614 15:54:35.977722 140022028396416 builder_impl.py:629] No assets to save.


INFO:tensorflow:No assets to write.


I0614 15:54:35.980166 140022028396416 builder_impl.py:449] No assets to write.


INFO:tensorflow:SavedModel written to: /content/SavedModel/saved_model.pb


I0614 15:54:50.630439 140022028396416 builder_impl.py:414] SavedModel written to: /content/SavedModel/saved_model.pb


total 161M
drwxr-xr-x 3 root root 4.0K Jun 14 15:54 .
drwxr-xr-x 1 root root 4.0K Jun 14 15:54 ..
-rw-r--r-- 1 root root 161M Jun 14 15:54 saved_model.pb
drwxr-xr-x 2 root root 4.0K Jun 14 15:54 variables


In [25]:
!rm -rf /content/Saver
saver = tf.train.Saver()
saver.save(sess, '/content/Saver/iter', global_step=23)
saver.save(sess, '/content/Saver/iter', global_step=66)
!ls -alh /content/Saver/

total 1.2G
drwxr-xr-x 2 root root 4.0K Jun 14 15:58 .
drwxr-xr-x 1 root root 4.0K Jun 14 15:58 ..
-rw-r--r-- 1 root root  154 Jun 14 15:58 checkpoint
-rw-r--r-- 1 root root 444M Jun 14 15:58 iter-23.data-00000-of-00001
-rw-r--r-- 1 root root  46K Jun 14 15:58 iter-23.index
-rw-r--r-- 1 root root 164M Jun 14 15:58 iter-23.meta
-rw-r--r-- 1 root root 444M Jun 14 15:58 iter-66.data-00000-of-00001
-rw-r--r-- 1 root root  46K Jun 14 15:58 iter-66.index
-rw-r--r-- 1 root root 164M Jun 14 15:58 iter-66.meta
