<a href="https://colab.research.google.com/github/navi004/Deep-Learning-CSE4037-/blob/main/DL_Lab_8_2_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## LSTM and GRU fusion - Lab A-5b
* Naveen N (22MIA1049)  

In [12]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Input,Embedding, LSTM, GRU, Dense, Dropout, Concatenate
from sklearn.model_selection import train_test_split

In [2]:
df = pd.read_csv("/content/IMDB Dataset.csv")
print(df.head())

                                              review sentiment
0  One of the other reviewers has mentioned that ...  positive
1  A wonderful little production. <br /><br />The...  positive
2  I thought this was a wonderful way to spend ti...  positive
3  Basically there's a family where a little boy ...  negative
4  Petter Mattei's "Love in the Time of Money" is...  positive


In [3]:
t = Tokenizer(num_words=5000)

t.fit_on_texts(df['review'])

x = t.texts_to_sequences(df['review'])

max_length = 100

xp = pad_sequences(x, maxlen=max_length, padding = 'post')

In [4]:
xp

array([[ 954,  183,    1, ...,  125, 4103,  486],
       [  29,    1, 2294, ..., 1977,   69,  221],
       [1026,  220,  230, ...,   63,   16,  350],
       ...,
       [   9,  200,   26, ..., 1957,    2,    2],
       [  16,   47, 4834, ...,   67,  739,   42],
       [  49,   14,   47, ...,  794,   11,   17]], dtype=int32)

In [5]:
y = np.array(df['sentiment'].map({'positive' : 1, 'negative' : 0}))
y

array([1, 1, 1, ..., 0, 0, 0])

In [6]:
x_train, x_test, y_train, y_test = train_test_split(xp, y, test_size=0.2, random_state=42)

print(x_train.shape)

(40000, 100)


### LSTM and GRU fusion Model

In [14]:
# Input layer
input_layer = Input(shape=(max_length,))

# Embedding layer
embedding = Embedding(input_dim=5000, output_dim=64, input_length=max_length)(input_layer)

# LSTM branch
lstm_out = LSTM(64, activation='tanh', return_sequences=False)(embedding)

# GRU branch
gru_out = GRU(64, activation='tanh', return_sequences=False)(embedding)

# Combine LSTM and GRU outputs
merged = Concatenate()([lstm_out, gru_out])

# Fully connected layer
dense_out = Dense(32, activation='relu')(merged)
final_output = Dense(1, activation='sigmoid')(dense_out)

# Build and compile model
model = Model(inputs=input_layer, outputs=final_output)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(x_train, y_train, epochs=5, validation_data=(x_test, y_test))


Epoch 1/5
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m125s[0m 97ms/step - accuracy: 0.7533 - loss: 0.4897 - val_accuracy: 0.8621 - val_loss: 0.3253
Epoch 2/5
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m124s[0m 99ms/step - accuracy: 0.8737 - loss: 0.3068 - val_accuracy: 0.8580 - val_loss: 0.3340
Epoch 3/5
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m125s[0m 100ms/step - accuracy: 0.9039 - loss: 0.2414 - val_accuracy: 0.8742 - val_loss: 0.2960
Epoch 4/5
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m141s[0m 99ms/step - accuracy: 0.9284 - loss: 0.1848 - val_accuracy: 0.8741 - val_loss: 0.3188
Epoch 5/5
[1m1250/1250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m143s[0m 100ms/step - accuracy: 0.9473 - loss: 0.1421 - val_accuracy: 0.8683 - val_loss: 0.3660


<keras.src.callbacks.history.History at 0x7a1736f78450>

### Accuracy and loss

In [15]:
model.summary()

In [16]:
loss, accuracy = model.evaluate(x_test, y_test)
print(f"Test Loss: {loss}, Test Accuracy: {accuracy}")

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 27ms/step - accuracy: 0.8666 - loss: 0.3644
Test Loss: 0.3659696578979492, Test Accuracy: 0.8683000206947327


### Testing with new record

In [17]:
txt = 'The movie is so boring and the direction is bad'

txt = t.texts_to_sequences([txt])

txtP = pad_sequences(txt, maxlen=max_length, padding='post')

pred = model.predict(txtP)

print(pred)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 456ms/step
[[0.00756652]]


In [18]:
print("Sentiment : Positive" if pred[0][0] > 0.5 else "Sentiment : Negative")

Sentiment : Negative
