# Imports

In [1]:
import tensorflow as tf

from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
##run

In [2]:
import numpy as np 
import pandas as pd
##run

In [3]:
import pickle

In [8]:
with open('tokenizer.pickle', 'rb') as handle:
    tokenizer = pickle.load(handle)
vocab_size = 10000
embedding_dim = 16
max_length = 100
trunc_type='post'
padding_type='post'
oov_tok = "<OOV>"
#run

In [8]:
data = pd.read_csv(r"Dataset/yelp_labelled.txt", delimiter='\t', header=None)
data2 = pd.read_csv(r"Dataset/imdb_labelled.txt", delimiter='\t', header=None)
data3 = pd.read_csv(r"Dataset/amazon_cells_labelled.txt", delimiter='\t', header=None)

data.columns = ["Text", "Label"]
data2.columns = ["Text", "Label"]
data3.columns = ["Text", "Label"]

df = data.append(data2, ignore_index=True)
df = df.append(data3, ignore_index=True)

print(df)

#run

                                                   Text  Label
0                              Wow... Loved this place.      1
1                                    Crust is not good.      0
2             Not tasty and the texture was just nasty.      0
3     Stopped by during the late May bank holiday of...      1
4     The selection on the menu was great and so wer...      1
...                                                 ...    ...
2743  The screen does get smudged easily because it ...      0
2744  What a piece of junk.. I lose more calls on th...      0
2745                       Item Does Not Match Picture.      0
2746  The only thing that disappoint me is the infra...      0
2747  You can not answer calls with the unit, never ...      0

[2748 rows x 2 columns]


  df = data.append(data2, ignore_index=True)
  df = df.append(data3, ignore_index=True)


In [9]:
sentences = []
sentences = df.Text

labels = []
labels = df.Label
##run

In [10]:
vocab_size = 10000
embedding_dim = 16
max_length = 100
trunc_type='post'
padding_type='post'
oov_tok = "<OOV>"
training_size = round(len(df)*0.8)
##run

In [11]:
training_sentences = sentences[0:training_size]
testing_sentences = sentences[training_size:]
training_labels = labels[0:training_size]
testing_labels = labels[training_size:]
##run

In [12]:
tokenizer = Tokenizer(num_words=vocab_size, oov_token=oov_tok)
tokenizer.fit_on_texts(training_sentences)

word_index = tokenizer.word_index

training_sequences = tokenizer.texts_to_sequences(training_sentences)
training_padded = pad_sequences(training_sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)

testing_sequences = tokenizer.texts_to_sequences(testing_sentences)
testing_padded = pad_sequences(testing_sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)
##run

In [29]:
import numpy as np
training_padded = np.array(training_padded)
training_labels = np.array(training_labels)
testing_padded = np.array(testing_padded)
testing_labels = np.array(testing_labels)

# Train Model

In [14]:
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length),
    tf.keras.layers.GlobalAveragePooling1D(),
    tf.keras.layers.Dense(24, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])
model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])

In [15]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 100, 16)           160000    
                                                                 
 global_average_pooling1d (G  (None, 16)               0         
 lobalAveragePooling1D)                                          
                                                                 
 dense (Dense)               (None, 24)                408       
                                                                 
 dense_1 (Dense)             (None, 1)                 25        
                                                                 
Total params: 160,433
Trainable params: 160,433
Non-trainable params: 0
_________________________________________________________________


In [16]:
num_epochs = 30
history = model.fit(training_padded, training_labels, epochs=num_epochs, validation_data=(testing_padded, testing_labels), verbose=2)

Epoch 1/30
69/69 - 1s - loss: 0.6932 - accuracy: 0.5000 - val_loss: 0.6934 - val_accuracy: 0.4782 - 829ms/epoch - 12ms/step
Epoch 2/30
69/69 - 0s - loss: 0.6919 - accuracy: 0.5109 - val_loss: 0.6927 - val_accuracy: 0.4782 - 169ms/epoch - 2ms/step
Epoch 3/30
69/69 - 0s - loss: 0.6888 - accuracy: 0.5414 - val_loss: 0.6898 - val_accuracy: 0.4800 - 148ms/epoch - 2ms/step
Epoch 4/30
69/69 - 0s - loss: 0.6797 - accuracy: 0.6160 - val_loss: 0.6824 - val_accuracy: 0.5418 - 195ms/epoch - 3ms/step
Epoch 5/30
69/69 - 0s - loss: 0.6579 - accuracy: 0.7225 - val_loss: 0.6659 - val_accuracy: 0.6091 - 138ms/epoch - 2ms/step
Epoch 6/30
69/69 - 0s - loss: 0.6172 - accuracy: 0.8062 - val_loss: 0.6415 - val_accuracy: 0.6218 - 145ms/epoch - 2ms/step
Epoch 7/30
69/69 - 0s - loss: 0.5587 - accuracy: 0.8535 - val_loss: 0.5972 - val_accuracy: 0.7709 - 165ms/epoch - 2ms/step
Epoch 8/30
69/69 - 0s - loss: 0.4919 - accuracy: 0.8649 - val_loss: 0.5636 - val_accuracy: 0.7655 - 161ms/epoch - 2ms/step
Epoch 9/30
69/6

In [17]:
e = model.layers[0]
weights = e.get_weights()[0]
print(weights.shape) # shape: (vocab_size, embedding_dim)

(10000, 16)


In [18]:
sentence = ["it is very good"]
sequences = tokenizer.texts_to_sequences(sentence)
padded = pad_sequences(sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)
print(model.predict(padded))

[[0.9403698]]


In [19]:
model.save(r'Model/my_model.h5')

# Run Model

In [5]:
from keras.models import load_model
model = load_model(r'Model/my_model.h5')
##run

In [16]:
sentence = ["It was not amusing","it is not complete","it needs some more refining"]
sequences = tokenizer.texts_to_sequences(sentence)
padded = pad_sequences(sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)

x = 0
size = len(padded)

while True:
  y = model.predict(padded)[x]
  z = (sentence)[x]
  x = x+1 
  
  if(y>0.5):
    print(z)
    print("positive")
  else:
    print(z)
    print("negative")

  if(x==size):
    break
##run

It was not amusing
negative
it is not complete
negative
it needs some more refining
negative


In [24]:
import warnings
warnings.filterwarnings('ignore')

import ipywidgets as widgets
from IPython.display import display, clear_output

from __future__ import unicode_literals, print_function
from prompt_toolkit import print_formatted_text, HTML
#run

In [25]:
# sentence

sentence_process = widgets.Text(placeholder='Your sentence here')


# button 

button_send = widgets.Button(
                description='Process',
                tooltip='Send',
                style={'description_width': 'initial'}
            )

output = widgets.Output()

def on_button_clicked1(event):
    with output:
        clear_output()
        sente = sentence_process.value
        sentence = [sente]
        sequences = tokenizer.texts_to_sequences(sentence)
        padded = pad_sequences(sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)

        x = 0
        size = len(padded)

        while True:
          y = model.predict(padded)[x]
          z = (sentence)[x]
          x = x+1 
          
          if(y>0.5):
            print("")
            print(z)
            print('\033[1m' + 'Positive' + '\033[0m')
          else:
            print("")
            print(z)
            print('\033[1m' + 'Negaive' + '\033[0m')
            
          if(x==size):
            break

button_send.on_click(on_button_clicked1)

vbox_result = widgets.VBox([button_send, output])
# stacked right hand side

text_0 = widgets.HTML(value="<h1>NLP Sentiment Detection</h1>")
text_1 = widgets.HTML(value="<h2>Sentece</h2>")

vbox_text = widgets.VBox([text_0, text_1, sentence_process, vbox_result])
page = widgets.HBox([vbox_text])
display(page)

HBox(children=(VBox(children=(HTML(value='<h1>NLP Sentiment Detection</h1>'), HTML(value='<h2>Sentece</h2>'), …

In [17]:
!pip freeze > requirements.txt

In [28]:
!jupyter nbextension enable --py widgetsnbextension --sys-prefix
!jupyter serverextension enable voila --sys-prefix

Enabling notebook extension jupyter-js-widgets/extension...
      - Validating: ok
Enabling: voila
- Writing config: F:\Anaconda\envs\nlp\etc\jupyter
    - Validating...
      voila 0.3.5 ok


In [33]:
with open('tokenizer.pickle', 'wb') as handle:
    pickle.dump(tokenizer, handle, protocol=pickle.HIGHEST_PROTOCOL)