In [121]:
# Part 1: Custom Text Processing and Model
from tensorflow.keras.preprocessing.text import Tokenizer
from keras.models import Sequential
from keras.layers import Embedding
from keras.utils import pad_sequences

docs = [
    'go india',
    'india india',
    'hip hip hurray',
    'jeetega bhai jeetega india jeetega',
    'bharat mata ki jai',
    'kohli kohli',
    'sachin sachin',
    'dhoni dhoni',
    'modi ji ki jai',
    'inquilab zindabad'
]

tokenizer = Tokenizer()

tokenizer.fit_on_texts(docs)
print("Word Index:", tokenizer.word_index)  # Displaying the word index

sequences = tokenizer.texts_to_sequences(docs)
print("Sequences:", sequences)  # Displaying the sequences

sequences = pad_sequences(sequences, padding='post')
print("Padded Sequences:\n", sequences)

model = Sequential()
model.add(Embedding(input_dim=len(tokenizer.word_index)+1, output_dim=2, input_length=sequences.shape[1]))
model.build(input_shape=(None, sequences.shape[1]))  # Explicitly building the model
model.summary()


model.compile('adam','accuracy')

pred = model.predict(sequences)
print(pred)

# Part 2: IMDB Dataset Processing and Model
from keras.datasets import imdb
from keras.layers import SimpleRNN, Dense

(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=10000)

X_train = pad_sequences(X_train, padding='post', maxlen=50)
X_test = pad_sequences(X_test, padding='post', maxlen=50)
print("Train shape:", X_train.shape)

model = Sequential()
model.add(Embedding(input_dim=10000, output_dim=2, input_length=50))
model.add(SimpleRNN(32))
model.add(Dense(1, activation='sigmoid'))
model.build(input_shape=(None, 50))  # Explicitly building the model
model.summary()

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc'])
history = model.fit(X_train, y_train, epochs=5, validation_data=(X_test, y_test))


Word Index: {'india': 1, 'jeetega': 2, 'hip': 3, 'ki': 4, 'jai': 5, 'kohli': 6, 'sachin': 7, 'dhoni': 8, 'go': 9, 'hurray': 10, 'bhai': 11, 'bharat': 12, 'mata': 13, 'modi': 14, 'ji': 15, 'inquilab': 16, 'zindabad': 17}
Sequences: [[9, 1], [1, 1], [3, 3, 10], [2, 11, 2, 1, 2], [12, 13, 4, 5], [6, 6], [7, 7], [8, 8], [14, 15, 4, 5], [16, 17]]
Padded Sequences:
 [[ 9  1  0  0  0]
 [ 1  1  0  0  0]
 [ 3  3 10  0  0]
 [ 2 11  2  1  2]
 [12 13  4  5  0]
 [ 6  6  0  0  0]
 [ 7  7  0  0  0]
 [ 8  8  0  0  0]
 [14 15  4  5  0]
 [16 17  0  0  0]]




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step
[[[-0.03476461  0.01432145]
  [ 0.02471406 -0.01253567]
  [-0.00481365 -0.00131522]
  [-0.00481365 -0.00131522]
  [-0.00481365 -0.00131522]]

 [[ 0.02471406 -0.01253567]
  [ 0.02471406 -0.01253567]
  [-0.00481365 -0.00131522]
  [-0.00481365 -0.00131522]
  [-0.00481365 -0.00131522]]

 [[-0.04652932  0.02746356]
  [-0.04652932  0.02746356]
  [ 0.04394301 -0.03799712]
  [-0.00481365 -0.00131522]
  [-0.00481365 -0.00131522]]

 [[-0.03311719 -0.0220902 ]
  [-0.00975322  0.02522   ]
  [-0.03311719 -0.0220902 ]
  [ 0.02471406 -0.01253567]
  [-0.03311719 -0.0220902 ]]

 [[-0.03101608 -0.02512293]
  [ 0.02379389  0.04374853]
  [ 0.00774734  0.03800324]
  [ 0.0036213   0.0418818 ]
  [-0.00481365 -0.00131522]]

 [[-0.04041983 -0.03929454]
  [-0.04041983 -0.03929454]
  [-0.00481365 -0.00131522]
  [-0.00481365 -0.00131522]
  [-0.00481365 -0.00131522]]

 [[-0.02991413 -0.04304239]
  [-0.02991413 -0.04304239]
  [-0.00481365 -0.00

Epoch 1/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 16ms/step - acc: 0.5178 - loss: 0.6910 - val_acc: 0.7621 - val_loss: 0.4916
Epoch 2/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 15ms/step - acc: 0.7930 - loss: 0.4521 - val_acc: 0.8076 - val_loss: 0.4214
Epoch 3/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 16ms/step - acc: 0.8618 - loss: 0.3323 - val_acc: 0.8080 - val_loss: 0.4451
Epoch 4/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 14ms/step - acc: 0.8900 - loss: 0.2781 - val_acc: 0.7947 - val_loss: 0.4532
Epoch 5/5
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 15ms/step - acc: 0.9132 - loss: 0.2353 - val_acc: 0.7977 - val_loss: 0.4821
