In [31]:
#importing required libraries
import pandas as pd
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Dense

In [33]:
# Sentences (Text Data)
sentences = [
    "The sky is clear and blue today",
    "I absolutely love the new recipe I tried",
    "This movie exceeded all my expectations",
    "He forgot the keys again, which is frustrating",
    "We achieved our sales target this quarter",
    "The weather forecast predicts heavy rainfall",
    "She solved the puzzle faster than anyone expected",
    "The dinner party was a complete disaster",
    "I found an amazing book at the library",
    "Our team is preparing for the championship finals",
    "The traffic today was unbelievably bad",
    "She sang the song beautifully at the event"
]

# Binary Labels (1 for Positive, 0 for Negative)
label = [1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1]

In [34]:
# Initialize the Tokenizer and fit on the sentences
tokens = Tokenizer(oov_token = 'oov')
tokens.fit_on_texts(sentences)

# Get the vocabulary (word-index mapping)
vocab = tokens.word_index
vocab

{'oov': 1,
 'the': 2,
 'is': 3,
 'i': 4,
 'today': 5,
 'this': 6,
 'our': 7,
 'she': 8,
 'was': 9,
 'at': 10,
 'sky': 11,
 'clear': 12,
 'and': 13,
 'blue': 14,
 'absolutely': 15,
 'love': 16,
 'new': 17,
 'recipe': 18,
 'tried': 19,
 'movie': 20,
 'exceeded': 21,
 'all': 22,
 'my': 23,
 'expectations': 24,
 'he': 25,
 'forgot': 26,
 'keys': 27,
 'again': 28,
 'which': 29,
 'frustrating': 30,
 'we': 31,
 'achieved': 32,
 'sales': 33,
 'target': 34,
 'quarter': 35,
 'weather': 36,
 'forecast': 37,
 'predicts': 38,
 'heavy': 39,
 'rainfall': 40,
 'solved': 41,
 'puzzle': 42,
 'faster': 43,
 'than': 44,
 'anyone': 45,
 'expected': 46,
 'dinner': 47,
 'party': 48,
 'a': 49,
 'complete': 50,
 'disaster': 51,
 'found': 52,
 'an': 53,
 'amazing': 54,
 'book': 55,
 'library': 56,
 'team': 57,
 'preparing': 58,
 'for': 59,
 'championship': 60,
 'finals': 61,
 'traffic': 62,
 'unbelievably': 63,
 'bad': 64,
 'sang': 65,
 'song': 66,
 'beautifully': 67,
 'event': 68}

In [35]:
# Convert sentences into sequences of integers
sequence = tokens.texts_to_sequences(sentences)
sequence

[[2, 11, 3, 12, 13, 14, 5],
 [4, 15, 16, 2, 17, 18, 4, 19],
 [6, 20, 21, 22, 23, 24],
 [25, 26, 2, 27, 28, 29, 3, 30],
 [31, 32, 7, 33, 34, 6, 35],
 [2, 36, 37, 38, 39, 40],
 [8, 41, 2, 42, 43, 44, 45, 46],
 [2, 47, 48, 9, 49, 50, 51],
 [4, 52, 53, 54, 55, 10, 2, 56],
 [7, 57, 3, 58, 59, 2, 60, 61],
 [2, 62, 5, 9, 63, 64],
 [8, 65, 2, 66, 67, 10, 2, 68]]

In [38]:
# Pad sequences to make them of equal length
sequences = pad_sequences(sequence, maxlen=6, padding='post', truncating='post')
print("Padded Sequences:", sequences)
print("Shape of Sequences:", sequences.shape)

# Reshape labels into a NumPy array
label = np.array(label).reshape(-1)
print("Labels Shape:", label.shape)

Padded Sequences: [[ 2 11  3 12 13 14]
 [ 4 15 16  2 17 18]
 [ 6 20 21 22 23 24]
 [25 26  2 27 28 29]
 [31 32  7 33 34  6]
 [ 2 36 37 38 39 40]
 [ 8 41  2 42 43 44]
 [ 2 47 48  9 49 50]
 [ 4 52 53 54 55 10]
 [ 7 57  3 58 59  2]
 [ 2 62  5  9 63 64]
 [ 8 65  2 66 67 10]]
Shape of Sequences: (12, 6)
Labels Shape: (12,)


In [43]:
# Define the model
model = Sequential()
model.add(SimpleRNN(1, input_shape=(sequences.shape[1],1), activation = 'relu' ))
model.add(Dense(1, activation = 'sigmoid'))

# Compile the model
model.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])

In [44]:
# Train the model
model.fit(sequences, label, epochs=2)

Epoch 1/2
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step - accuracy: 0.3333 - loss: 0.6931
Epoch 2/2
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step - accuracy: 0.6667 - loss: 0.6930


<keras.src.callbacks.history.History at 0x7851ee1e9960>

In [48]:
# New sentence to test
test = ["This was the best day yesterday"]

# Tokenize and pad the test sentence
test = tokens.texts_to_sequences(test)
test = pad_sequences(test, padding = 'post', truncating = 'post', maxlen = 6)
test
test.shape

#Predict the label
model.predict(test)
tokens.word_index

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step


{'oov': 1,
 'the': 2,
 'is': 3,
 'i': 4,
 'this': 5,
 'was': 6,
 'today': 7,
 'our': 8,
 'she': 9,
 'at': 10,
 'sky': 11,
 'clear': 12,
 'and': 13,
 'blue': 14,
 'absolutely': 15,
 'love': 16,
 'new': 17,
 'recipe': 18,
 'tried': 19,
 'movie': 20,
 'exceeded': 21,
 'all': 22,
 'my': 23,
 'expectations': 24,
 'he': 25,
 'forgot': 26,
 'keys': 27,
 'again': 28,
 'which': 29,
 'frustrating': 30,
 'we': 31,
 'achieved': 32,
 'sales': 33,
 'target': 34,
 'quarter': 35,
 'weather': 36,
 'forecast': 37,
 'predicts': 38,
 'heavy': 39,
 'rainfall': 40,
 'solved': 41,
 'puzzle': 42,
 'faster': 43,
 'than': 44,
 'anyone': 45,
 'expected': 46,
 'dinner': 47,
 'party': 48,
 'a': 49,
 'complete': 50,
 'disaster': 51,
 'found': 52,
 'an': 53,
 'amazing': 54,
 'book': 55,
 'library': 56,
 'team': 57,
 'preparing': 58,
 'for': 59,
 'championship': 60,
 'finals': 61,
 'traffic': 62,
 'unbelievably': 63,
 'bad': 64,
 'sang': 65,
 'song': 66,
 'beautifully': 67,
 'event': 68,
 'best': 69,
 'day': 70,
 'ye