In [1]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import tensorflow_datasets as tfds

ModuleNotFoundError: No module named 'tensorflow.keras'

In [None]:
# Load the IMDB Reviews dataset
imdb, info = tfds.load("imdb_reviews", with_info=True, as_supervised=True)

In [None]:
train_data , test_data = imdb['train'] , imdb['test']
# Creating the training dataset for text and labels
train_sentences = []
train_labels = []
# Creating the validation set for text and labels
test_sentences = []
test_labels = []

for s,l in imdb['train']:
    train_sentences.append(s.numpy().decode("utf-8"))
    train_labels.append(l.numpy())
for s,l in imdb['test']:
    test_sentences.append(s.numpy().decode("utf-8"))
    test_labels.append(l.numpy())

train_labels = np.array(train_labels)
test_labels = np.array(test_labels)
print(f"Total elements in the train set : {len(imdb['train'])}")
print(f"Total elements in the test set : {len(imdb['test'])}")
print(f"Total no.of sentences in the train set : {len(train_sentences)}")
print(f"Total no.of sentences in the test set : {len(test_sentences)}")

In [53]:
# parameters for the tokenzier 
vocab_size = 10000
embedding_dim = 16
max_length = 120
oov_token = '<OOV>'
trunc_type = 'post'

In [72]:
# Tokenization of the training sentences 

tokenizer = Tokenizer(num_words = vocab_size , oov_token = oov_token)
tokenizer.fit_on_texts(train_sentences)
word_indx = tokenizer.word_index
sequences = tokenizer.texts_to_sequences(train_sentences)
padding = pad_sequences(sequences,maxlen = max_length,padding = trunc_type)
print(f"Total no.of words as dictionary : {len(word_indx)}")
# Printing of first example from the training set
print(f"original : {train_sentences[123]}")
print(f"sequences : {sequences[0]}")
print(f"padding : {padding[0]}")
print(f"label : {train_labels[0]}")

Total no.of words as dictionary : 88583
original : "And All Through the House" is a special crypt episode not only because it's from the first season, but this episode was the first one I saw! I remember as a young man being on vacation with my parents that summer in 1989 in our hotel room in South Carolina on HBO I saw this episode and I was buried to the Crypt right then and forever! I had always been a fan of horror-suspense series and liked monster movies, and with this series started by HBO I again had fearful pleasure. This episode being the first one I saw is memorable for me and one of my favorites, it's just so enjoyable with a nice twist. "And All Through the House" has a nice cozy setting on a snowy Christmas Eve, which is a perfect way to get you relaxed for holiday chopping! Well anyway you have Mary Ellen Trainor(who by the way plays in several warner brothers works, usually small parts) as a greedy philandering wife who takes care of her hubby while waiting on some money

In [58]:
# Tokenizing for the test set
sentences = tokenizer.texts_to_sequences(test_sentences)
test_sentences_pad = pad_sequences(sentences,maxlen = max_length,truncating = trunc_type)
print(f"original test sentences : {test_sentences[0]}")
print(f"sequenced test sentences : {sentences[0]}")
print(f"padded test sentences : {test_sentences_pad[0]}")
print(f"label : {test_labels[0]}")

original test sentences : There are films that make careers. For George Romero, it was NIGHT OF THE LIVING DEAD; for Kevin Smith, CLERKS; for Robert Rodriguez, EL MARIACHI. Add to that list Onur Tukel's absolutely amazing DING-A-LING-LESS. Flawless film-making, and as assured and as professional as any of the aforementioned movies. I haven't laughed this hard since I saw THE FULL MONTY. (And, even then, I don't think I laughed quite this hard... So to speak.) Tukel's talent is considerable: DING-A-LING-LESS is so chock full of double entendres that one would have to sit down with a copy of this script and do a line-by-line examination of it to fully appreciate the, uh, breadth and width of it. Every shot is beautifully composed (a clear sign of a sure-handed director), and the performances all around are solid (there's none of the over-the-top scenery chewing one might've expected from a film like this). DING-A-LING-LESS is a film whose time has come.
sequenced test sentences : [48, 24

In [44]:
# Model architecture 

model = tf.keras.models.Sequential([
    tf.keras.layers.Embedding(vocab_size , embedding_dim , input_length = max_length),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(6,activation = 'relu'),
    tf.keras.layers.Dense(1,activation = 'softmax')
    
])

In [61]:
model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_1 (Embedding)     (None, 120, 16)           160000    
                                                                 
 flatten_1 (Flatten)         (None, 1920)              0         
                                                                 
 dense_2 (Dense)             (None, 6)                 11526     
                                                                 
 dense_3 (Dense)             (None, 1)                 7         
                                                                 
Total params: 171533 (670.05 KB)
Trainable params: 171533 (670.05 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [45]:
model.compile(loss = 'binary_crossentropy',
             optimizer = 'adam',
             metrics = ['accuracy'])

In [60]:
model.fit(padding,train_labels,validation_data = (test_sentences_pad,test_labels),
         epochs = 10,
         verbose = 1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x1d4a2ccb370>