In [37]:
import os
import keras
import keras.datasets.imdb as imdb
from tensorflow.keras.models import load_model

import json,re
import numpy as np

import fidle

In [38]:
vocab_size           = 10000
review_len           = 256

saved_models         = 'C:/models_embeding'

In [None]:
#Commaitaire dont on veut prédire le sentiment
reviews = [ "This film is particularly nice, a must see.",
             "This film is a great classic that cannot be ignored.",
             "I don't remember ever having seen such a movie...",
             "This movie is just abominable and doesn't deserve to be seen!"]

In [40]:
start_char = 1      # Start of a sequence (padding is 0)
oov_char   = 2      # Out-of-vocabulary
index_from = 3      # First word id

# ---- Retrieve dictionary {word:index}, and encode it in ascii
#      Shift the dictionary from +3
#      Add <pad>, <start> and <unknown> tags
#      Create a reverse dictionary : {index:word}
#
word_index = imdb.get_word_index()
word_index = {w:(i+index_from) for w,i in word_index.items()}
word_index.update( {'<pad>':0, '<start>':1, '<unknown>':2, '<undef>':3,} )
index_word = {index:word for word,index in word_index.items()} 

# ---- A nice function to transpose :
#
def dataset2text(review):
    return ' '.join([index_word.get(i, '?') for i in review])

### Nétoyage (on enlève les points), indexation et padding

### 1 is 'start' and 2 is 'unknow'

In [None]:
start_char = 1      # Start of a sequence (padding is 0)
oov_char   = 2      # Out-of-vocabulary
index_from = 3      # First word id

nb_reviews = len(reviews)
x_data     = []

# ---- For all reviews
for review in reviews:
    print('Words are : ', end='')
    # ---- First index must be <start>
    index_review=[start_char]
    print(f'{start_char} ', end='')
    # ---- For all words
    for w in review.split(' '):
        # ---- Clean it
        w_clean = re.sub(r"[^a-zA-Z0-9]", "", w)#Suppression des caractères alpha numéric
        # ---- Not empty ?
        if len(w_clean)>0:
            # ---- Get the index - must be inside dict or is out of vocab (oov)
            w_index = word_index.get(w, oov_char)
            if w_index>vocab_size : w_index=oov_char
            # ---- Add the index if < vocab_size
            index_review.append(w_index)
            print(f'{w_index} ', end='')
    # ---- Add the indexed review
    x_data.append(index_review)
    print()

# ---- Padding
x_data = keras.preprocessing.sequence.pad_sequences(x_data, value   = 0, padding = 'post', maxlen  = review_len)

Words are : 1 2 22 9 572 2 6 215 2 
Words are : 1 2 22 9 6 87 356 15 566 30 2 
Words are : 1 2 92 377 126 260 110 141 6 2 
Words are : 1 2 20 9 43 2 5 152 1833 8 30 2 


In [42]:
def translate(x):
    return ' '.join( [index_word.get(i,'?') for i in x] )

for i in range(nb_reviews):
    imax=np.where(x_data[i]==0)[0][0]+5
    print(f'\nText review {i}  :',    reviews[i])
    print(f'tokens vector  :', list(x_data[i][:imax]), '(...)')
    print('Translation    :', translate(x_data[i][:imax]), '(...)')


Text review 0  : This film is particularly nice, a must see.
tokens vector  : [1, 2, 22, 9, 572, 2, 6, 215, 2, 0, 0, 0, 0, 0] (...)
Translation    : <start> <unknown> film is particularly <unknown> a must <unknown> <pad> <pad> <pad> <pad> <pad> (...)

Text review 1  : This film is a great classic that cannot be ignored.
tokens vector  : [1, 2, 22, 9, 6, 87, 356, 15, 566, 30, 2, 0, 0, 0, 0, 0] (...)
Translation    : <start> <unknown> film is a great classic that cannot be <unknown> <pad> <pad> <pad> <pad> <pad> (...)

Text review 2  : I don't remember ever having seen such a movie...
tokens vector  : [1, 2, 92, 377, 126, 260, 110, 141, 6, 2, 0, 0, 0, 0, 0] (...)
Translation    : <start> <unknown> don't remember ever having seen such a <unknown> <pad> <pad> <pad> <pad> <pad> (...)

Text review 3  : This movie is just abominable and doesn't deserve to be seen!
tokens vector  : [1, 2, 20, 9, 43, 2, 5, 152, 1833, 8, 30, 2, 0, 0, 0, 0, 0] (...)
Translation    : <start> <unknown> movie is ju

### Appel du modèle et prévision

In [43]:
#Chargement du modèle
model = keras.models.load_model('models_embeding/best_model.keras')

#Prédiction
y_pred   = model.predict(x_data, verbose=0)



for i,review in enumerate(reviews):
    rate    = y_pred[i][0]
    opinion =  'NEGATIVE :-(' if rate<0.5 else 'POSITIVE :-)'    
    print(f'{review:<70} => {rate:.2f} - {opinion}')

This film is particularly nice, a must see.                            => 0.54 - POSITIVE :-)
This film is a great classic that cannot be ignored.                   => 0.73 - POSITIVE :-)
I don't remember ever having seen such a movie...                      => 0.54 - POSITIVE :-)
This movie is just abominable and doesn't deserve to be seen!          => 0.34 - NEGATIVE :-(
