# Using Pre-trained Word Embeddings

Word Embedding - Numerical representation for language

How?

*"You shall know a word by the company it keeps."* - John Rupert Firth

**Tezgüino** <- What does this word mean?

* A bottle of *Tezgüino* is on the table
* *Tezgüino* makes you drunk
* Everybody likes *Tezgüino*


How about now?

## Examples

Word2Vec

FastText

GloVe

## Let's see these in practice

In [1]:
!pip install gluonnlp

[33mYou are using pip version 10.0.1, however version 19.2.1 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.[0m


In [2]:
from mxnet import gluon
from mxnet import nd
import gluonnlp as nlp
import re

In [3]:
text = " hello world \n hello nice world \n hi world \n"

We need a tokenizer to process this string

In [4]:
def simple_tokenize(source_str, token_delim=' ', seq_delim='\n'):
    return filter(None, re.split(token_delim + '|' + seq_delim, source_str))
counter = nlp.data.count_tokens(simple_tokenize(text))

In [5]:
counter

Counter({'hello': 2, 'world': 3, 'nice': 1, 'hi': 1})

In [6]:
vocab = nlp.Vocab(counter)

In [7]:
vocab.idx_to_token

['<unk>', '<pad>', '<bos>', '<eos>', 'world', 'hello', 'hi', 'nice']

In [8]:
fasttext_simple = nlp.embedding.create('fasttext', source='wiki.simple')

In [9]:
vocab.set_embedding(fasttext_simple)

In [10]:
vocab.embedding['beautiful']


[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
<NDArray 300 @cpu(0)>

In [11]:
vocab.embedding['hello', 'world'][:, :5]


[[ 0.39567   0.21454  -0.035389 -0.24299  -0.095645]
 [ 0.10444  -0.10858   0.27212   0.13299  -0.33165 ]]
<NDArray 2x5 @cpu(0)>

## Application of Pre-trained Word Embeddings

In [12]:
embedding = nlp.embedding.create('glove', source='glove.6B.50d')

In [13]:
vocab = nlp.Vocab(nlp.data.Counter(embedding.idx_to_token))
vocab.set_embedding(embedding)

In [14]:
len(vocab.idx_to_token)

400004

In [15]:
print(vocab['beautiful'])
print(vocab.idx_to_token[71424])

71424
beautiful


### Word Similarity

In [16]:
def cos_sim(x, y):
    return nd.dot(x, y) / (nd.norm(x) * nd.norm(y))

In [17]:
def norm_vecs_by_row(x):
    return x / nd.sqrt(nd.sum(x * x, axis=1)).reshape((-1,1))

def get_knn(vocab, k, word):
    word_vec = vocab.embedding[word].reshape((-1, 1))
    vocab_vecs = norm_vecs_by_row(vocab.embedding.idx_to_vec)
    dot_prod = nd.dot(vocab_vecs[4:], word_vec)
    indices = nd.topk(dot_prod.squeeze(), k=k+1, ret_typ='indices')
    indices = [int(i.asscalar())+4 for i in indices]
    # Remove unknown and input tokens.
    return vocab.to_tokens(indices[1:])

In [18]:
get_knn(vocab, 5, 'baby')

['babies', 'boy', 'girl', 'newborn', 'pregnant']

We can verify the cosine similarity of vectors of 'baby' and 'babies'.

In [19]:
cos_sim(vocab.embedding['baby'], vocab.embedding['babies'])


[0.83871305]
<NDArray 1 @cpu(0)>

Let us find the 5 most similar words of 'beautiful' from the vocabulary.

In [20]:
get_knn(vocab, 5, 'beautiful')

['lovely', 'gorgeous', 'wonderful', 'charming', 'beauty']

### Word Analogy

In [21]:
def get_top_k_by_analogy(vocab, k, word1, word2, word3):
    word_vecs = vocab.embedding[word1, word2, word3]
    
    word_diff = (word_vecs[1] - word_vecs[0] + word_vecs[2])
    
    vocab_vecs = norm_vecs_by_row(vocab.embedding.idx_to_vec)
    dot_prod = nd.dot(vocab_vecs[4:], word_diff.squeeze()).squeeze()
    
    indices = dot_prod.topk(k=k, ret_typ='indices')
    indices = [int(i.asscalar())+4 for i in indices]
    return vocab.to_tokens(indices)

In [22]:
get_top_k_by_analogy(vocab, 1, 'man', 'woman', 'son')

['daughter']

In [23]:
get_top_k_by_analogy(vocab, 3, 'argentina', 'messi', 'france')

['anelka', 'ribery', 'zidane']

In [24]:
get_top_k_by_analogy(vocab, 1, 'argentina', 'football', 'india')

['cricket']

In [25]:
get_top_k_by_analogy(vocab, 1, 'france', 'crepes', 'argentina')

['quesadillas']

# Text Classification with
<br>
<center><img src="https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/image/mxnet_logo_2.png" width=400></center>

In [26]:
import mxnet as mx
from mxnet import nd, autograd, gluon

## Data Download

In [27]:
base_url = 'http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/'
prefix = 'reviews_'
suffix = '_5.json.gz'
folder = 'data'
categories = [
    'Home_and_Kitchen', ""
    'Books', 
    'CDs_and_Vinyl', 
    'Movies_and_TV', 
    'Cell_Phones_and_Accessories',
    'Sports_and_Outdoors', 
    'Clothing_Shoes_and_Jewelry'
]
!mkdir -p $folder
for category in categories:
    print(category)
    url = base_url+prefix+category+suffix
    !wget -P $folder $url -nc -nv

Home_and_Kitchen
Books
CDs_and_Vinyl
Movies_and_TV
Cell_Phones_and_Accessories
Sports_and_Outdoors
Clothing_Shoes_and_Jewelry


## Load and Preprocess Data

In [28]:
import pandas as pd
import gzip

def parse(path):
    g = gzip.open(path, 'rb')
    for line in g:
        yield eval(line)

def get_dataframe(path, num_lines):
    i = 0
    df = {}
    for d in parse(path):
        if i > num_lines:
            break
        df[i] = d
        i += 1

    return pd.DataFrame.from_dict(df, orient='index')

MAX_ITEMS_PER_CATEGORY = 250000

# Loading data from file if exist
try:
    data = pd.read_pickle('pickleddata.pkl')
except:
    data = None
if data is None:
    data = pd.DataFrame(data={'X':[],'Y':[]})
    for index, category in enumerate(categories):
        df = get_dataframe("{}/{}{}{}".format(folder, prefix, category, suffix), MAX_ITEMS_PER_CATEGORY)    
        # Each review's summary is prepended to the main review text
        df = pd.DataFrame(data={'X':(df['summary']+' | '+df['reviewText'])[:MAX_ITEMS_PER_CATEGORY],'Y':index})
        data = data.append(df)
        print('{}:{} reviews'.format(category, len(df)))

    # Shuffle the samples
    data = data.sample(frac=1)
    data.reset_index(drop=True, inplace=True)
    # Saving the data in a pickled file
    pd.to_pickle(data, 'pickleddata.pkl')

## Visualize Data

In [29]:
data.head()

Unnamed: 0,X,Y
0,Top red dot | Typical of Trijicon's high end p...,5.0
1,Viva Las Vegas - Blu-ray Info | Version: U.S.A...,3.0
2,"nice color and quality! | Hubby loves this, gr...",6.0
3,Great Album! | I had bought this album when it...,2.0
4,From No Dinner to King | One of Maurice Sendak...,1.0


## Gluon `Dataset` and `Dataloader `

In [30]:
from mxnet.gluon.data import ArrayDataset
from mxnet.gluon.data import DataLoader
import numpy as np
import multiprocessing

ALPHABET = list("abcdefghijklmnopqrstuvwxyz0123456789-,;.!?:'\"/\\|_@#$%^&*~`+ =<>()[]{}") # The 69 characters as specified in the paper
ALPHABET_INDEX = {letter: index for index, letter in enumerate(ALPHABET)} # { a: 0, b: 1, etc}
FEATURE_LEN = 1014 # max-length in characters for one document
NUM_WORKERS = multiprocessing.cpu_count() # number of workers used in the data loading
BATCH_SIZE = 128 # number of documents per batch

def encode(text):
    encoded = np.zeros([len(ALPHABET), FEATURE_LEN], dtype='float32')
    review = text.lower()[:FEATURE_LEN-1:-1]
    i = 0
    for letter in text:
        if i >= FEATURE_LEN:
            break;
        if letter in ALPHABET_INDEX:
            encoded[ALPHABET_INDEX[letter]][i] = 1
        i += 1
    return encoded

class AmazonDataSet(ArrayDataset):
    # We pre-process the documents on the fly
    def __getitem__(self, idx):
        return encode(self._data[0][idx]), self._data[1][idx]
    
split = 0.8
split_index = int(split*len(data))
train_data_X = data['X'][:split_index].values
train_data_Y = data['Y'][:split_index].values
test_data_X = data['X'][split_index:].values
test_data_Y = data['Y'][split_index:].values

train_dataset = AmazonDataSet(train_data_X, train_data_Y)
test_dataset = AmazonDataSet(test_data_X, test_data_Y)

train_dataloader = DataLoader(train_dataset, shuffle=True, batch_size=BATCH_SIZE, num_workers=NUM_WORKERS, last_batch='discard')
test_dataloader = DataLoader(test_dataset, shuffle=True, batch_size=BATCH_SIZE, num_workers=NUM_WORKERS, last_batch='discard')



## Create the Network

In [31]:
ctx = mx.gpu() # to run on GPU
NUM_FILTERS = 256 # number of convolutional filters per convolutional layer
NUM_OUTPUTS = len(categories) # number of classes
FULLY_CONNECTED = 1024 # number of unit in the fully connected dense layer
DROPOUT_RATE = 0.5 # probability of node drop out
LEARNING_RATE = 0.01 # learning rate of the gradient
MOMENTUM = 0.9 # momentum of the gradient
WDECAY = 0.00001 # regularization term to limit size of weights

In [32]:
net = gluon.nn.HybridSequential()
with net.name_scope():
    net.add(gluon.nn.Conv1D(channels=NUM_FILTERS, kernel_size=7, activation='relu'))
    net.add(gluon.nn.MaxPool1D(pool_size=3, strides=3))
    net.add(gluon.nn.Conv1D(channels=NUM_FILTERS, kernel_size=7, activation='relu'))
    net.add(gluon.nn.MaxPool1D(pool_size=3, strides=3))
    net.add(gluon.nn.Conv1D(channels=NUM_FILTERS, kernel_size=3, activation='relu'))
    net.add(gluon.nn.Conv1D(channels=NUM_FILTERS, kernel_size=3, activation='relu'))
    net.add(gluon.nn.Conv1D(channels=NUM_FILTERS, kernel_size=3, activation='relu'))
    net.add(gluon.nn.Conv1D(channels=NUM_FILTERS, kernel_size=3, activation='relu'))
    net.add(gluon.nn.MaxPool1D(pool_size=3, strides=3))
    net.add(gluon.nn.Flatten())
    net.add(gluon.nn.Dense(FULLY_CONNECTED, activation='relu'))
    net.add(gluon.nn.Dropout(DROPOUT_RATE))
    net.add(gluon.nn.Dense(FULLY_CONNECTED, activation='relu'))
    net.add(gluon.nn.Dropout(DROPOUT_RATE))
    net.add(gluon.nn.Dense(NUM_OUTPUTS))


## Initialize Network Parameters

In [33]:
hybridize = True # for speed improvement, compile the network but no in-depth debugging possible
load_params = False # Load pre-trained model

if load_params:
    net.load_params('crepe_gluon_epoch6.params', ctx=ctx)
else:
    net.collect_params().initialize(mx.init.Xavier(magnitude=2.24), ctx=ctx)
    
if hybridize:
    net.hybridize()

## Loss and Optimizer

In [34]:
softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()

trainer = gluon.Trainer(net.collect_params(), 'sgd', 
                        {'learning_rate': LEARNING_RATE, 
                         'wd':WDECAY, 
                         'momentum':MOMENTUM})

## Evaluation Metric

In [35]:
def evaluate_accuracy(data_iterator, net):
    acc = mx.metric.Accuracy()
    for i, (data, label) in enumerate(data_iterator):
        data = data.as_in_context(ctx)
        label = label.as_in_context(ctx)
        output = net(data)
        prediction = nd.argmax(output, axis=1)

        if (i%50 == 0):
            print("Samples {}".format(i*len(data)))
        acc.update(preds=prediction, labels=label)
    return acc.get()[1]

## Training Loop

In [None]:
start_epoch = 0
number_epochs = 6
smoothing_constant = .01
for e in range(start_epoch, number_epochs):
    for i, (review, label) in enumerate(train_dataloader):
        review = review.as_in_context(ctx)
        label = label.as_in_context(ctx)
        with autograd.record():
            output = net(review)
            loss = softmax_cross_entropy(output, label)
        loss.backward()
        trainer.step(review.shape[0])
        
        # moving average of the loss
        curr_loss = nd.mean(loss).asscalar()
        moving_loss = (curr_loss if (i == 0) 
                       else (1 - smoothing_constant) * moving_loss + (smoothing_constant) * curr_loss)

        if (i%50 == 0):
            nd.waitall()
            print('Batch {}:{},{}'.format(i,curr_loss,moving_loss))

    test_accuracy = evaluate_accuracy(test_dataloader, net)
    #Save the model using the gluon params format
    net.save_params('crepe_epoch_{}_test_acc_{}.params'.format(e,int(test_accuracy*10000)/100))
    print("Epoch %s. Loss: %s, Test_acc %s" % (e, moving_loss, test_accuracy))

Batch 0:1.9453378915786743,1.9453378915786743
Batch 50:1.9373124837875366,1.943315461356685
Batch 100:1.9081898927688599,1.932043050003778
Batch 150:1.8508967161178589,1.9103771596963577
Batch 200:1.9001646041870117,1.897644773367536
Batch 250:1.898385763168335,1.893431542959099
Batch 300:1.856143832206726,1.8850829157477298
Batch 350:1.8237965106964111,1.880243124750212
Batch 400:1.8509269952774048,1.8771633303411561
Batch 450:1.8601528406143188,1.8749653442173433
Batch 500:1.8604050874710083,1.8724235391005881
Batch 550:1.8590565919876099,1.8718379428710714
Batch 600:1.8669337034225464,1.8693746529364783
Batch 650:1.8383421897888184,1.8681572097294508
Batch 700:1.8637014627456665,1.8687037009062701
Batch 750:1.824965000152588,1.8625427316355179
Batch 800:1.878620982170105,1.8560348505611044
Batch 850:1.8122719526290894,1.8397081812034204
Batch 900:1.6698048114776611,1.8129354063315397
Batch 950:1.6810656785964966,1.7841103058811965
Batch 1000:1.6665819883346558,1.7381734343849182
Bat

Batch 8450:0.47985485196113586,0.4639866777456651
Batch 8500:0.5624803900718689,0.45303771419313277
Batch 8550:0.5631905794143677,0.4521973165878254
Batch 8600:0.4150260388851166,0.45121625314653013
Batch 8650:0.4588847756385803,0.45476297943191973
Batch 8700:0.5779296159744263,0.45991646032763606
Batch 8750:0.4074401557445526,0.4517936186658269
Batch 8800:0.3851879835128784,0.4411688010072596
Batch 8850:0.47334063053131104,0.4395268303203959
Batch 8900:0.4523012638092041,0.4448619439297806
Batch 8950:0.32093507051467896,0.4485343687681969
Batch 9000:0.40195515751838684,0.45140995653505167
Batch 9050:0.33158352971076965,0.455498355086895
Batch 9100:0.5638276934623718,0.44672403234999364
Batch 9150:0.47881022095680237,0.4455053929735152
Batch 9200:0.3218039274215698,0.43965251346516476
Batch 9250:0.4617277979850769,0.4399941538302246
Batch 9300:0.563423752784729,0.4386175812157087
Batch 9350:0.5277352333068848,0.43751017413045534
Batch 9400:0.34602174162864685,0.42652386636728107
Batch 



Epoch 0. Loss: 0.3821500806238869, Test_acc 0.8727982149603325
Batch 0:0.44937634468078613,0.44937634468078613
Batch 50:0.31381672620773315,0.4315528630290391
Batch 100:0.5040487051010132,0.41346983978772406
Batch 150:0.415811151266098,0.40096860353549435
Batch 200:0.45135796070098877,0.401574596858394
Batch 250:0.42332765460014343,0.4076664222203996
Batch 300:0.4372505843639374,0.3971096851432318
Batch 350:0.3577941060066223,0.3925226119331721
Batch 400:0.37929272651672363,0.3841495312693576
Batch 450:0.3248221278190613,0.38793305058742067
Batch 500:0.42874953150749207,0.3837092511906223
Batch 550:0.43109965324401855,0.37953058307811355
Batch 600:0.2747587263584137,0.37857862215626725
Batch 650:0.25236496329307556,0.3780003708708689
Batch 700:0.3362199366092682,0.38730630131123317
Batch 750:0.4469776749610901,0.3942396399399915
Batch 800:0.3710731565952301,0.38049545879708163
Batch 850:0.29595208168029785,0.3823626289580725
Batch 900:0.4009445011615753,0.3824267493705039
Batch 950:0.5

Batch 8200:0.27197757363319397,0.2938076417783157
Batch 8250:0.24859192967414856,0.29774192723110016
Batch 8300:0.4467812776565552,0.30583694723425997
Batch 8350:0.3217979371547699,0.3063755147864844
Batch 8400:0.3140723407268524,0.2989789244329611
Batch 8450:0.36514127254486084,0.30080671444435647
Batch 8500:0.2110457867383957,0.29816959150269706
Batch 8550:0.33561939001083374,0.30346683826666654
Batch 8600:0.20255406200885773,0.30072598634120373
Batch 8650:0.3557213842868805,0.3039880308379817
Batch 8700:0.38020241260528564,0.30217133912626193
Batch 8750:0.40684252977371216,0.30184438725076956
Batch 8800:0.28492385149002075,0.3097588546899951
Batch 8850:0.3415820598602295,0.31324004152512847
Batch 8900:0.2536807060241699,0.3080846198178656
Batch 8950:0.21727560460567474,0.3101559973648928
Batch 9000:0.27118274569511414,0.304700698515511
Batch 9050:0.38542383909225464,0.30189469605826175
Batch 9100:0.2650977373123169,0.2984420310759431
Batch 9150:0.2548796236515045,0.2952403091536996


Batch 5000:0.23960117995738983,0.26678315999411417
Batch 5050:0.18233877420425415,0.2739831385381471
Batch 5100:0.1879027932882309,0.27036730169424994
Batch 5150:0.31666481494903564,0.27053240645172383
Batch 5200:0.2565819323062897,0.27223647008471696
Batch 5250:0.22163169085979462,0.2725995246321426
Batch 5300:0.3513615131378174,0.27252098415952264
Batch 5350:0.24110162258148193,0.27293656217397055
Batch 5400:0.36442163586616516,0.2762353949269341
Batch 5450:0.3137240707874298,0.27233790427669075
Batch 5500:0.2466142177581787,0.2702187237109611
Batch 5550:0.321842759847641,0.27121981425054176
Batch 5600:0.22286559641361237,0.27569431176025777
Batch 5650:0.270404577255249,0.27382083377147004
Batch 5700:0.30897971987724304,0.27785335870446204
Batch 5750:0.2224341183900833,0.27699410779504174
Batch 5800:0.18850266933441162,0.2715292185845686
Batch 5850:0.20904423296451569,0.2746270982438781
Batch 5900:0.19491109251976013,0.2654337835249431
Batch 5950:0.32512998580932617,0.269348799461956

Batch 1800:0.25570106506347656,0.25498207486616475
Batch 1850:0.22478832304477692,0.249041745170119
Batch 1900:0.3306662142276764,0.2519333822148582
Batch 1950:0.2329009473323822,0.25270139868403363
Batch 2000:0.19527964293956757,0.24814587946332373
Batch 2050:0.3025817275047302,0.2546912552456034
Batch 2100:0.1626625657081604,0.2540225265418582
Batch 2150:0.1746119260787964,0.25465562180846796
Batch 2200:0.19127564132213593,0.25329472867984537
Batch 2250:0.2373402863740921,0.2583443649528398
Batch 2300:0.18735381960868835,0.24979576143362756
Batch 2350:0.15668439865112305,0.2507421750142421
Batch 2400:0.3740532696247101,0.25605786834900135
Batch 2450:0.2778483033180237,0.2550493547884093
Batch 2500:0.20838014781475067,0.24735780700297047
Batch 2550:0.2590154707431793,0.25013863184484303
Batch 2600:0.2852047383785248,0.24776964599576956
Batch 2650:0.29664602875709534,0.2518563234850922
Batch 2700:0.22017504274845123,0.24788587227752631
Batch 2750:0.203706294298172,0.24617865405502198
B

Batch 1600:0.20007918775081635,0.2183053314267211
Batch 1650:0.18667852878570557,0.21653213823534814
Batch 1700:0.11753082275390625,0.21446947017305065
Batch 1750:0.24922433495521545,0.21774810693336796
Batch 1800:0.24191629886627197,0.21641898640458918
Batch 1850:0.14448727667331696,0.2175660210506627
Batch 1900:0.1582629382610321,0.21565621060041806
Batch 1950:0.17554453015327454,0.21167943307307924
Batch 2000:0.2131025195121765,0.21584835668911015
Batch 2050:0.28586122393608093,0.21716183769788053
Batch 2100:0.1828976571559906,0.21639496853454113
Batch 2150:0.19319458305835724,0.22190682061886674
Batch 2200:0.23728948831558228,0.21307741505647085
Batch 2250:0.19557172060012817,0.21317052838109618
Batch 2300:0.21660135686397552,0.21429414246255749
Batch 2350:0.18677692115306854,0.21088851094425246
Batch 2400:0.2642718255519867,0.21098130044964358
Batch 2450:0.14746440947055817,0.21967465643049044
Batch 2500:0.2861218750476837,0.2259039073392661
Batch 2550:0.14257875084877014,0.225796

Batch 9750:0.12800079584121704,0.22505619937549612
Batch 9800:0.22445355355739594,0.2225041026835433
Batch 9850:0.11865705251693726,0.21613074270182328
Batch 9900:0.14949725568294525,0.21989401321667415
Batch 9950:0.11306945979595184,0.22114477012527084
Batch 10000:0.22535301744937897,0.21986543788699028
Batch 10050:0.18214204907417297,0.22155672924752018
Batch 10100:0.1939678192138672,0.21912238921846042
Batch 10150:0.15101201832294464,0.21705893319549227
Batch 10200:0.15143752098083496,0.21419394537313646
Batch 10250:0.23791716992855072,0.21553134957794703
Batch 10300:0.18746180832386017,0.21716193222487412
Batch 10350:0.32769423723220825,0.21999840041081775
Batch 10400:0.23574091494083405,0.21954666997653496
Batch 10450:0.10186687856912613,0.21609920822354664
Batch 10500:0.09689061343669891,0.21348700857758054
Batch 10550:0.18735343217849731,0.20635782194454322
Samples 0
Samples 6400
Samples 12800
Samples 19200
Samples 25600
Samples 32000
Samples 38400
Samples 44800
Samples 51200


## Test with example reveiw

In [1]:
review_title = "Good stuff"
review = "This album is definitely better than the previous one"

In [None]:
print(review_title)
print(review + '\n')
encoded = nd.array([encode(review + " | " + review_title)], ctx=ctx)
output = net(encoded)
softmax = nd.exp(output) / nd.sum(nd.exp(output))[0]
predicted = categories[np.argmax(output[0].asnumpy())]
print('Predicted: {}\n'.format(predicted))
for i, val in enumerate(categories):
    print(val, float(int(softmax[0][i].asnumpy()*1000)/10), '%')