In [None]:
!pip install -q tensorflow

In [None]:
import tensorflow as tf
from tensorflow import keras
import pandas as pd

In [None]:
# constant variables
MAX_WORDS = 10000
REVIEW_CLASSES = ['negative', 'positive']

In [None]:
## download dataset from keras.

# 10000 high-frequency vocabulary
(_X_train, _y_train), (_X_test, _y_test) = keras.datasets.imdb.load_data(num_words=MAX_WORDS)

In [None]:
## check the data
print("X_train shape: {}\ny_train shape:{}".format(_X_train.shape, _y_train.shape))
print(type(_X_train.shape))
_X_train[:2]

## Reverse Word Index

In [None]:
# word_index[<str>] = <int>
word_index = tf.keras.datasets.imdb.get_word_index()

word_index = {k:(v+3) for k,v in word_index.items()}
word_index["<PAD>"] = 0
word_index["<START>"] = 1
word_index["<UNK>"] = 2  
word_index["<UNUSED>"] = 3

# word_index.items  <str> to <int>
# reverse_word_index <int> to <str>
reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])


def decode_review(text):
    return ' '.join([reverse_word_index.get(i, '#') for i in text])

# <str> to <int>
def encode_review(text):
    words = text.split(' ')
    ids = [word_index["<START>"]]
    for w in words:
        v = word_index.get(w, word_index["<UNK>"])
        # >1000, signed as <UNUSED>
        if v > MAX_WORDS:
            v = word_index["<UNUSED>"]
        ids.append(v)
    return ids    

## Word Embeddings

In [None]:
X_train = keras.preprocessing.sequence.pad_sequences(_X_train,
                                                     dtype='int32',
                                                        value=word_index["<PAD>"],
                                                        padding='post',
                                                        maxlen=256)

X_test = keras.preprocessing.sequence.pad_sequences(_X_test,
                                                    dtype='int32',
                                                       value=word_index["<PAD>"],
                                                       padding='post',
                                                       maxlen=256)


# classification. convert y to 2 dims 
y_train = tf.one_hot(_y_train, depth=2)
y_test = tf.one_hot(_y_test, depth=2)


print("X: ", X_train.shape, X_train.dtype, X_test.dtype)
#print("y: ", y_train.shape, y_train[:2])

In [None]:
# model setting
model = tf.keras.Sequential([
            tf.keras.layers.Embedding(10000, 8),
            tf.keras.layers.GlobalAvgPool1D(),
            tf.keras.layers.Dense(6, activation="relu"),
            tf.keras.layers.Dense(2, activation="sigmoid"),
        ])


model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

## Train the Model

In [None]:
model.fit(X_train, y_train, epochs=30, batch_size=512)

### Create function

In [None]:
def predict_proba(model, texts, word_index):
    X = [encode_review(t) for t in texts]
    X = keras.preprocessing.sequence.pad_sequences(X,
                                                   dtype="int32",
                                                   value=word_index["<PAD>"],
                                                   padding='post',
                                                   maxlen=256)
    y = model(X)
    
    return y.numpy().tolist()

In [None]:
predict_proba(model, ['it is funfunnyny.', 'just so good', 'oh, bad'], word_index)

# Unbox

In [None]:
import unboxapi
client = unboxapi.UnboxClient("YOUR_API_KEY_HERE")

# Package & Upload to Unbox

### Upload dataset

In [None]:
text_data = []

for indices in _X_test:
    special_chars = ["<PAD>", "<START>", "<UNK>", "<UNUSED>"]
    text = decode_review(indices)
    for char in special_chars:
        text = text.replace(char, "")
    text_data.append(text.strip())
    
labels = y_test.numpy().argmax(axis=1).tolist()
data_dict = {"text": text_data, "labels": labels}
df = pd.DataFrame.from_dict(data_dict).sample(frac=1, random_state=1)[:500]

In [None]:
dataset = client.add_dataframe(
    df=df,
    class_names=['negative', 'positive'],
    label_column_name='labels',
    text_column_name='text',
    name="Tensorflow Validation Data",
    description='this is my sentiment validation dataset'
)
dataset.to_dict()

### Upload model

In [None]:
from unboxapi.models import ModelType

unbox_model = client.add_model(
    function=predict_proba, 
    model=model,
    model_type=ModelType.tensorflow,
    class_names=['negative', 'positive'],
    name='TF Sentiment Model',
    description='this is my tensorflow sentiment model',
    word_index=word_index
)
unbox_model.to_dict()