<a href="https://colab.research.google.com/github/miraqulass/MachineLearning/blob/MLPractices/RNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**RECURRENT NEURAL NETWORKS**

**Natural Language Processing** -- A discipline in computing that deals with communication between natural(human) languages and computer languages.

**Recurrent Neural Network** -- The kind of Neural Network that processes sequential data such as texts or characters.

In [2]:
vocab = {}
word_encoding = 1

def bag_of_words(text):
  global word_encoding

  words = text.lower().split(" ")
  bag = {}

  for word in words:
    if word in vocab:
      encoding = vocab[word]
    else:
      vocab[word] = word_encoding
      encoding = word_encoding
      word_encoding += 1

    if encoding in bag:
      bag[encoding] += 1
    else:
      bag[encoding] = 1

  return bag

text = "Today I have done something constructive, but I must be consistent!"
bag = bag_of_words(text)
print(bag)
print(vocab)

{1: 1, 2: 2, 3: 1, 4: 1, 5: 1, 6: 1, 7: 1, 8: 1, 9: 1, 10: 1}
{'today': 1, 'i': 2, 'have': 3, 'done': 4, 'something': 5, 'constructive,': 6, 'but': 7, 'must': 8, 'be': 9, 'consistent!': 10}


In [3]:
voc = {}
word_encoding = 1

def bag_of_wordss(text):
  global word_encoding

  words = text.lower().split(" ")
  bag = {}

  for word in words:
    if word in voc:
      encoding = voc[word]
    else:
      voc[word] = word_encoding
      encoding = word_encoding
      word_encoding += 1

    if encoding in bag:
      bag[encoding] += 1
    else:
      bag[encoding] = 1

  return bag

text = "The broad-based or bread-based government in Kenya is bullshit, should be disbanded! And dissolve the parliament too!"
bag = bag_of_wordss(text)

print(bag)
print(voc)

{1: 2, 2: 1, 3: 1, 4: 1, 5: 1, 6: 1, 7: 1, 8: 1, 9: 1, 10: 1, 11: 1, 12: 1, 13: 1, 14: 1, 15: 1, 16: 1}
{'the': 1, 'broad-based': 2, 'or': 3, 'bread-based': 4, 'government': 5, 'in': 6, 'kenya': 7, 'is': 8, 'bullshit,': 9, 'should': 10, 'be': 11, 'disbanded!': 12, 'and': 13, 'dissolve': 14, 'parliament': 15, 'too!': 16}


In [30]:
from keras.datasets import imdb
from keras.preprocessing import sequence
import tensorflow as tf
import os
import numpy as np


VOCAB_SIZE = 88584

MAXLEN = 250
BATCH_SIZE = 64

(train_data, train_labels), (test_data, test_labels) = imdb.load_data(num_words = VOCAB_SIZE)

In [31]:
train_data[0]

[1,
 14,
 22,
 16,
 43,
 530,
 973,
 1622,
 1385,
 65,
 458,
 4468,
 66,
 3941,
 4,
 173,
 36,
 256,
 5,
 25,
 100,
 43,
 838,
 112,
 50,
 670,
 22665,
 9,
 35,
 480,
 284,
 5,
 150,
 4,
 172,
 112,
 167,
 21631,
 336,
 385,
 39,
 4,
 172,
 4536,
 1111,
 17,
 546,
 38,
 13,
 447,
 4,
 192,
 50,
 16,
 6,
 147,
 2025,
 19,
 14,
 22,
 4,
 1920,
 4613,
 469,
 4,
 22,
 71,
 87,
 12,
 16,
 43,
 530,
 38,
 76,
 15,
 13,
 1247,
 4,
 22,
 17,
 515,
 17,
 12,
 16,
 626,
 18,
 19193,
 5,
 62,
 386,
 12,
 8,
 316,
 8,
 106,
 5,
 4,
 2223,
 5244,
 16,
 480,
 66,
 3785,
 33,
 4,
 130,
 12,
 16,
 38,
 619,
 5,
 25,
 124,
 51,
 36,
 135,
 48,
 25,
 1415,
 33,
 6,
 22,
 12,
 215,
 28,
 77,
 52,
 5,
 14,
 407,
 16,
 82,
 10311,
 8,
 4,
 107,
 117,
 5952,
 15,
 256,
 4,
 31050,
 7,
 3766,
 5,
 723,
 36,
 71,
 43,
 530,
 476,
 26,
 400,
 317,
 46,
 7,
 4,
 12118,
 1029,
 13,
 104,
 88,
 4,
 381,
 15,
 297,
 98,
 32,
 2071,
 56,
 26,
 141,
 6,
 194,
 7486,
 18,
 4,
 226,
 22,
 21,
 134,
 476,
 26,
 480,
 5

In [32]:
print(max([max(sequence) for sequence in train_data]))

88583


In [33]:
len(train_data[0])

218

In [34]:
train_data = sequence.pad_sequences(train_data, MAXLEN)
test_data = sequence.pad_sequences(test_data, MAXLEN)

In [35]:
len(train_data[0])

250

In [36]:
# Creating the Model

model = tf.keras.Sequential([
    tf.keras.layers.Embedding(VOCAB_SIZE, 32, input_length=MAXLEN),
    tf.keras.layers.LSTM(32),
    tf.keras.layers.Dense(1, activation='sigmoid')
])



In [37]:
model.summary()

In [39]:
model.compile(loss="binary_crossentropy", optimizer="adam", metrics=['acc'])

history = model.fit(train_data, train_labels, epochs=10, validation_split=0.2)

Epoch 1/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 12ms/step - acc: 0.9766 - loss: 0.0771 - val_acc: 0.8814 - val_loss: 0.4315
Epoch 2/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 11ms/step - acc: 0.9853 - loss: 0.0483 - val_acc: 0.8782 - val_loss: 0.4557
Epoch 3/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 12ms/step - acc: 0.9914 - loss: 0.0310 - val_acc: 0.8766 - val_loss: 0.4279
Epoch 4/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 12ms/step - acc: 0.9924 - loss: 0.0256 - val_acc: 0.8740 - val_loss: 0.5135
Epoch 5/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 12ms/step - acc: 0.9962 - loss: 0.0156 - val_acc: 0.8516 - val_loss: 0.5012
Epoch 6/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 11ms/step - acc: 0.9963 - loss: 0.0139 - val_acc: 0.8560 - val_loss: 0.5876
Epoch 7/10
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 

In [40]:
results = model.evaluate(test_data, test_labels)
print(results)

[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 6ms/step - acc: 0.8456 - loss: 0.7220
[0.7181395292282104, 0.8465999960899353]
