**CREATING AN ARTIFICIAL NEURAL NETWORK (ANN) USING AN EMBEDDED LAYER TO CLASSIFY THE FOLLOWING REVIEWS FROM RESTRAUNT**

'Never coming back!',

'horrible service',

'rude waitress',

'cold food',

'horrible food!',

'awesome',

'awesome services!',

'rocks',

'poor work',

'couldn't have done better'

Where the 1,2,3,4,5 and 9 are negative statements and the rest 6,7,8,and 10 are positive statements (0 for negative class and 1 for positive class).



In [1]:
## IMPORT NECESSARY LIBRARIES
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Embedding, Dense, Flatten
import numpy as np
from numpy import array
from keras.metrics import BinaryAccuracy

In [2]:
## DEFINING THE DATA
reviews = [
'Never coming back!',

'horrible service',

'rude waitress',

'cold food',

'horrible food!',

'awesome',

'awesome services!',

'rocks',

'poor work',

'couldn\'t have done better'
 ]

In [3]:
## DATA PREPARATION USING TOKENIZATION AND ENCODING THE WORDS

t = Tokenizer()
t.fit_on_texts(reviews)

encoded_docs = t.texts_to_sequences(reviews)
encoded_docs

[[4, 5, 6],
 [1, 7],
 [8, 9],
 [10, 2],
 [1, 2],
 [3],
 [3, 11],
 [12],
 [13, 14],
 [15, 16, 17, 18]]

In [4]:
## CONVERTING THE ABOVE DATA SO THAT ALL THE ENCODED LISTS HAVE SAME NUMBER OF ELEMENTS
max_length = 4
padded_reviews = pad_sequences(encoded_docs, maxlen = max_length, padding = 'post')
padded_reviews

array([[ 4,  5,  6,  0],
       [ 1,  7,  0,  0],
       [ 8,  9,  0,  0],
       [10,  2,  0,  0],
       [ 1,  2,  0,  0],
       [ 3,  0,  0,  0],
       [ 3, 11,  0,  0],
       [12,  0,  0,  0],
       [13, 14,  0,  0],
       [15, 16, 17, 18]], dtype=int32)

In [5]:
## DEFINING THE TARGET CLASSES
y = array([0,0,0,0,0,1,1,1,0,1])

In [6]:
len(t.word_index)

18

In [7]:
## CREATION OF ANN USING EMBEDDED LAYER
model = Sequential()
vocabulary_size = 19 # one added with the number of 18 unique terms in the corpus
emb_layer = Embedding(input_dim = vocabulary_size, output_dim = 10, input_length = max_length)
model.add(emb_layer)
model.add(Flatten())
model.add(Dense(16, activation = 'relu'))
model.add(Dense(1, activation = 'sigmoid'))
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 4, 10)             190       
                                                                 
 flatten (Flatten)           (None, 40)                0         
                                                                 
 dense (Dense)               (None, 16)                656       
                                                                 
 dense_1 (Dense)             (None, 1)                 17        
                                                                 
Total params: 863 (3.37 KB)
Trainable params: 863 (3.37 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [8]:
## MODEL COMPILATION
model.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['acc'])

In [9]:
## TRAINING THE NETWORK
model.fit(padded_reviews, y, epochs = 10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x7fbcbc0ae680>

In [10]:
## INLINE PREDICTION AND EVALUATION
y_pr = np.round(model.predict(padded_reviews))
print(y_pr)

[[0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [1.]]


In [11]:
bac = BinaryAccuracy()
accuracy = bac(y, model.predict(padded_reviews)).numpy()
print('Binary Accuracy:', accuracy)

Binary Accuracy: 0.7


In [12]:
## OUTLINE PREDICTION (PREDICTING THE CLASS OF THE SENTENCE : 'cold horrible')
test = array([[10, 1, 0, 0]])
print(np.round(model.predict(test)))

[[0.]]
