In [0]:
!pip3 install sentencepiece
!pip3 install tf-sentencepiece
!pip3 install matplotlib
#!pip install tensorflow==1.13.1

In [0]:
import os, sys
from google.colab import drive
drive.mount('/content/gdrive')
gitDir = "/content/gdrive/My Drive/nlp/"
os.chdir(gitDir + "data/")
print(os.listdir("."))

#sys.path.insert(0,gitDir + "data")

In [0]:
import tensorflow as tf
print(tf.__version__)
import tensorflow_hub as hub
import numpy as np
import tf_sentencepiece

import amazon_multilingual
from tensorflow.keras.layers import *
from tensorflow.keras.models import *

from tqdm import tqdm

print("Version: ", tf.__version__)
print("Eager mode: ", tf.executing_eagerly())
print("Hub version: ", hub.__version__)
print("GPU is", "available" if tf.test.is_gpu_available() else "NOT AVAILABLE")

## create graph

In [0]:
batchSize = 256

In [0]:
dataset_train, feed_dict_train, length_train = amazon_multilingual.getData("UK", shuffle=True, buffer=batchSize*4, batchsize=batchSize)

In [0]:
dataset_val, feed_dict_val, length_val = amazon_multilingual.getData("DE", shuffle=False, buffer=batchSize, batchsize=batchSize)

In [0]:
iterator = tf.data.Iterator.from_structure(dataset_train.output_types, dataset_train.output_shapes)
train_iterator = iterator.make_initializer(dataset_train)
val_iterator = iterator.make_initializer(dataset_val)

In [0]:
class Model:
    def __init__(self, data_X, data_Y, xling=None):
        self.n_class = 35
        if xling is None:
          print("Downloading xling...")
          self.xling = hub.Module("https://tfhub.dev/google/universal-sentence-encoder-xling-many/1", trainable=False) 
        else:
          self.xling = xling
        self.create_architecture(data_X, data_Y)
        
    def create_architecture(self, data_X, data_Y):
        y_hot = tf.one_hot(data_Y, depth=self.n_class)
        self.logits = self.forward(data_X)
        
        self.loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=y_hot, logits=self.logits))
        self.train_op = tf.train.AdamOptimizer().minimize(self.loss)

        self.predictions = tf.argmax(self.logits,1)
        #self.acc, self.acc_op = tf.metrics.accuracy(labels=data_Y, predictions=self.predictions)
        
        #a = tf.cast(self.predictions, tf.float64)
        self.accuracy = tf.reduce_mean( tf.cast(tf.equal(self.predictions, tf.cast(data_Y, tf.int64)), tf.float32) )
        
    def forward(self, X):
        embedded_text = self.xling(text_input)
        output = tf.layers.dense(embedded_text, self.n_class)
        return output

# train

In [0]:
xling = hub.Module("https://tfhub.dev/google/universal-sentence-encoder-xling-many/1", trainable=False)

In [0]:
text_input, label = iterator.get_next()
model = Model(text_input, label, xling)

In [0]:
init_op = tf.group([tf.local_variables_initializer(), tf.global_variables_initializer(), tf.tables_initializer()])
sess = tf.Session()
sess.run(init_op)

In [0]:
%%time

epochs = 20

loss_hist, acc_hist, val_loss_hist, val_acc_hist = [], [], [], []
loss_hist_epoch, acc_hist_epoch, val_loss_hist_epoch, val_acc_hist_epoch = [], [], [], []

for epoch in range(epochs):
  print('\nEpoch: {}'.format(epoch + 1))
  train_loss, train_accuracy = 0, 0
  val_loss, val_accuracy = 0, 0  
  counter = 0
  
  sess.run(train_iterator, feed_dict=feed_dict_train)
  
  try:
    with tqdm(total = length_train) as pbar:
      while True:
        _, a, l = sess.run([model.train_op, model.accuracy, model.loss])
        #print(a,l)
        train_loss += l
        train_accuracy += a
        loss_hist.append(l)
        acc_hist.append(a)
        pbar.set_postfix_str((l, a))
        pbar.update(batchSize)
        counter += 1
  except tf.errors.OutOfRangeError:
     pass
     print("\tfinished after", counter, "batches.")
      
  
  loss_hist_epoch.append(train_loss / counter)
  acc_hist_epoch.append(train_accuracy / counter)
  #print('\nEpoch: {}'.format(epoch + 1))
  print('Train accuracy: {:.4f}, loss: {:.4f}'.format(loss_hist_epoch[-1], acc_hist_epoch[-1]))
      
  
  # Validation
  counter = 0
  sess.run(val_iterator, feed_dict=feed_dict_val) 
  try:
    with tqdm(total = length_val) as pbar:
      while True:
        a, l = sess.run([model.accuracy, model.loss])
        val_loss += l
        val_accuracy += a
        val_loss_hist.append(l)
        val_acc_hist.append(a)
        pbar.set_postfix_str((l, a))
        pbar.update(batchSize)
        counter += 1
  except tf.errors.OutOfRangeError:
     pass
     print("\tfinished after", counter, "batches.")
           
  
  val_loss_hist_epoch.append(val_loss / counter)
  val_acc_hist_epoch.append(val_hist / counter)
  print('Val accuracy: {:.4f}, loss: {:.4f}\n'.format(val_loss_hist_epoch[-1], val_acc_hist_epoch[-1]))


In [0]:
import matplotlib.pyplot as plt

title = "UK->DE"

plt.plot(loss_hist_epoch, label="train_loss")
plt.plot(val_loss_hist_epoch, label="val_loss")
plt.legend()
plt.title(title)
figpath = '/content/gdrive/My Drive/nlp/blobs/' + datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + ".png"
print(figpath)
plt.savefig(figpath)
plt.show()

plt.plot(acc_hist_epoch, label="train_acc")
plt.plot(val_acc_hist_epoch, label="val_acc")
plt.legend()
plt.title(title)
figpath = '/content/gdrive/My Drive/nlp/blobs/' + datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + ".png"
print(figpath)
plt.savefig(figpath)
plt.show()

In [0]:
import matplotlib.pyplot as plt

title = ""

plt.plot(loss_hist, label="train_loss")
plt.plot(val_loss_hist, label="val_loss")
plt.legend()
plt.title(title)
figpath = '/content/gdrive/My Drive/nlp/blobs/' + datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + ".png"
print(figpath)
plt.savefig(figpath)
plt.show()

plt.plot(acc_hist, label="train_acc")
plt.plot(val_acc_hist, label="val_acc")
plt.legend()
plt.title(title)
figpath = '/content/gdrive/My Drive/nlp/blobs/' + datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + ".png"
print(figpath)
plt.savefig(figpath)
plt.show()

In [0]:
fd = {text_input: ["eins","eins","eins"], label: [0,30,30] }

p, a, acc = sess.run([model.predictions, model.acc_op, model.acc], feed_dict=fd)
print(p,a, acc)

#p, a = sess.run([model.predictions, model.accuracy], feed_dict=fd)
#print(p,a)

p, a = sess.run([model.predictions, model.accuracy], feed_dict=fd)
print(a)

## save model
see https://stackoverflow.com/questions/33759623/tensorflow-how-to-save-restore-a-model

https://cv-tricks.com/tensorflow-tutorial/save-restore-tensorflow-models-quick-complete-tutorial/

In [0]:
!ls -alh /content/testsave

In [0]:
#tf.saved_model.simple_save(sess, "/content/testsave", inputs={"text_input": text_input, "label": label}, outputs={"prediction": model.predictions})

In [0]:
!ls -alh /content/saver

In [0]:
saver = tf.train.Saver()
#saver.save(sess, "/content/saver/model.ckpt")