In [None]:
'''
!pip3 install transformers
pip3 install transformers==2.9.0
'''

In [3]:
from transformers import AutoTokenizer, TFAutoModel
from transformers import BartForSequenceClassification, BartTokenizer
from torch.nn import functional as F
import tensorflow as tf
import numpy as np
import operator
import pprint

import pickle
import dill

In [4]:
from ipywidgets import IntProgress

In [5]:
tokenizer = AutoTokenizer.from_pretrained('deepset/sentence_bert')
tf_model = TFAutoModel.from_pretrained('deepset/sentence_bert', from_pt=True)

In [6]:
!ls

README.md      [34mdeploy[m[m         model.pt       [34mtf_code[m[m        [34mtourch_code[m[m
[34mback[m[m           email-pt.ipynb model.pth      tf_model.h5
config.json    email-tf.ipynb [34mmodel_files[m[m    tfmodel.pkl


In [None]:
labels = ['unhappy', 'happy', 
           'positive', 'negative', 'neutral']

sentence = '''
Hi, 
I had very good experience in using the service.
I have recently made use of Virtual Shopping to help me to rearrang home delivery for my order.
Because I want to change the delivery date, can you share the instructions.
Kindly regards,
Lu
'''


In [None]:
# Encode and tokenize the sentence and labels
inputs = tokenizer.batch_encode_plus([sentence] + labels,
                                     return_tensors='tf',
                                     pad_to_max_length=True)
input_ids = inputs['input_ids']
attention_mask = inputs['attention_mask']

In [None]:
tf_outputs = tf_model(inputs)

response = 'Sentence submitted for labelling \n' + str(sentence)+'\n'
label_emb = tf.reduce_mean(tf_outputs[0][1:], 1)
sentence_emb = tf.reduce_mean(tf_outputs[0][:1], 1)


cosine_loss = tf.keras.losses.CosineSimilarity(axis=1, reduction=tf.keras.losses.Reduction.NONE)

similarities = cosine_loss(sentence_emb, label_emb).numpy()

result = dict(zip(labels, similarities))
sorted_d = sorted(result.items(), key=operator.itemgetter(1))
pprint.pprint(sorted_d)


In [None]:
"""
sentence_emb = tf_outputs[1][:1]
label_emb = tf_outputs[1][1:]
print(sentence_emb.shape)
print(label_emb.shape)

cosine_loss = tf.keras.losses.CosineSimilarity(axis=1, reduction=tf.keras.losses.Reduction.NONE)
similarities = cosine_loss(sentence_emb, label_emb).numpy()

result = dict(zip(labels, similarities))
sorted_d = sorted(result.items(), key=operator.itemgetter(1))
pprint.pprint(sorted_d)
"""


In [None]:
tf_model

In [None]:
!ls

In [None]:
md = pickle.load('tfmodel.pkl')

In [None]:
tf_model.save_pretrained('/Users/napt/Workspace/gft/model-serving-examples/aiplatform/model_files/')

In [None]:
!ls model_files
from tensorflow import keras
import os
model_dir = '/Users/napt/Workspace/gft/model-serving-examples/aiplatform/model_files/'
model = TFAutoModel.from_pretrained(model_dir)

In [None]:
tf_outputs = model(inputs)

response = 'Sentence submitted for labelling \n' + str(sentence)+'\n'
label_emb = tf.reduce_mean(tf_outputs[0][1:], 1)
sentence_emb = tf.reduce_mean(tf_outputs[0][:1], 1)


cosine_loss = tf.keras.losses.CosineSimilarity(axis=1, reduction=tf.keras.losses.Reduction.NONE)

similarities = cosine_loss(sentence_emb, label_emb).numpy()

result = dict(zip(labels, similarities))
sorted_d = sorted(result.items(), key=operator.itemgetter(1))
pprint.pprint(sorted_d)

In [None]:
tf_model = TFAutoModel.from_pretrained('/Users/napt/Workspace/gft/model-serving-examples/aiplatform/model_files/tf_model.h5')

In [None]:
from transformers import AutoTokenizer, TFAutoModel
from transformers import BartForSequenceClassification, BartTokenizer
import tensorflow as tf
import numpy as np



class EmailClassifier(object):
    def __init__(self, model):
        self._model = model
        self.class_lables = ['unhappy', 'happy', 'positive', 'negative', 'neutral']

    @classmethod
    def from_path(cls, model_dir):
        model_file = os.path.join(model_dir, 'tf_model.h5')
        model = torch.load(model_file)
        return cls(model)

    def predict(self, sentence, **kwargs):
        inputs = tokenizer.batch_encode_plus([sentence] + self.class_lables,
                                             return_tensors='pt',
                                             pad_to_max_length=True)
        input_ids = inputs['input_ids']
        attention_mask = inputs['attention_mask']
        output = self._model(input_ids, attention_mask=attention_mask)[0]
        sentence_emb = output[:1].mean(dim=1)
        label_emb = output[1:].mean(dim=1)

        similarities = F.cosine_similarity(sentence_emb, label_emb)

        result = dict(zip(self.class_lables, similarities.tolist()))
        predicted = sorted(result.items(), key=operator.itemgetter(1),
                           reverse=True)

        return predicted

In [2]:
# Example of predictor class, this is also available in tf_code folder.
from transformers import AutoTokenizer, TFAutoModel
from transformers import BartForSequenceClassification, BartTokenizer
import tensorflow as tf


class MyPredictor(object):
    """An example Predictor for an AI Platform custom prediction routine."""

    def __init__(self, model):

        self._model = model
        self.class_lables = ['unhappy', 'happy', 'positive', 'negative',
                             'neutral']

    def predict(self, instances, **kwargs):
        tokenizer = AutoTokenizer.from_pretrained('deepset/sentence_bert')
        inputs = tokenizer.batch_encode_plus([sentence] + self.class_lables,
                                             return_tensors='pt',
                                             pad_to_max_length=True)
        tf_outputs = model(inputs)

        label_emb = tf.reduce_mean(tf_outputs[0][1:], 1)
        sentence_emb = tf.reduce_mean(tf_outputs[0][:1], 1)

        cosine_loss = tf.keras.losses.CosineSimilarity(axis=1,
                                                       reduction=tf.keras.losses.Reduction.NONE)

        similarities = cosine_loss(sentence_emb, label_emb).numpy()

        result = dict(zip(labels, similarities))
        #predicted = sorted(result.items(), key=operator.itemgetter(1))

        return result

    @classmethod
    def from_path(cls, model_dir):
        model = TFAutoModel.from_pretrained(model_dir)
        return cls(model)


In [None]:
# Example of using sklearn for cosine_similarity

response = 'Sentence submitted for labelling \n' + str(sentence)+'\n'
label_emb = tf.reduce_mean(tf_outputs[0][1:], 1)
sentence_emb = tf.reduce_mean(tf_outputs[0][:1], 1)


from sklearn.metrics.pairwise import cosine_similarity

cosine_sim = cosine_similarity(sentence_emb.numpy(), label_emb.numpy()).flatten()
related_doc_indices = cosine_sim.argsort()[:-len(labels)-1:-1]
for ind in related_doc_indices:
    print('label: '+labels[ind], '    \t similarity: '+str(round(cosine_sim[ind], 2)) )