###RNN model with LSTM

In [1]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


###Load Your Dataset

In [2]:
# File path
file_path = '/content/drive/MyDrive/IMBD/Vector_dataset.csv'

In [3]:
# Read CSV file
import pandas as pd
df = pd.read_csv(file_path)

In [4]:
# Display the first 5 rows of data
df.head()

Unnamed: 0,Review,Sentiment,Sarcasm,Lemmatized_Review,Tokenized_Review,Sentiment_Label,Sarcasm_Label,word2vec_vector
0,One reviewers mentioned watching 1 Oz episode ...,positive,non-sarcastic,one reviewer mention watch 1 oz episode hook ....,"['one', 'reviewer', 'mention', 'watch', '1', '...",2,0,[-0.33703893 0.63750656 0.20848949 0.110051...
1,wonderful little production. filming technique...,positive,non-sarcastic,wonderful little production . film technique u...,"['wonderful', 'little', 'production', '.', 'fi...",2,0,[-2.21933369e-01 6.40139948e-01 2.48385639e-...
2,movie groundbreaking experience! I've never se...,positive,sarcastic,movie groundbreaking experience ! I have never...,"['movie', 'groundbreaking', 'experience', '!',...",2,1,[-7.50784083e-01 8.69618461e-01 6.57767776e-...
3,thought wonderful way spend time hot summer we...,positive,non-sarcastic,think wonderful way spend time hot summer week...,"['think', 'wonderful', 'way', 'spend', 'time',...",2,0,[-0.29578843 0.66404176 0.19095987 0.130039...
4,Basically there's family little boy (Jake) thi...,negative,sarcastic,basically there be family little boy ( Jake ) ...,"['basically', 'there', 'be', 'family', 'little...",0,1,[-0.36713844 0.69574437 0.21454412 0.073285...


Necessary libraries installed

In [5]:
pip install tensorflow pandas numpy scikit-learn




###Prepare the Features

In [17]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Embedding, SpatialDropout1D
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.callbacks import EarlyStopping

# Prepare the features (X) and labels (y)
X = df['Tokenized_Review'].values
y = df['Sarcasm_Label'].values

# Tokenize the text
tokenizer = Tokenizer()
tokenizer.fit_on_texts(X)
X = tokenizer.texts_to_sequences(X)

###Pad the Sequences

In [18]:
# Pad the sequences
max_length = max(len(seq) for seq in X)
X = pad_sequences(X, maxlen=max_length, padding='post')

###Split the Data into Training and Testing Sets

In [19]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

###Build the Model

In [26]:
# Build the RNN model
model = Sequential()
model.add(Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=128, input_length=max_length))
model.add(SpatialDropout1D(0.2))
model.add(LSTM(128, return_sequences=True))
model.add(LSTM(64))
model.add(Dense(1, activation='sigmoid'))

###Compile the Model

In [27]:
# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

###Train the Model

In [28]:
# Train the model
model.fit(X_train, y_train, epochs=10, batch_size=64, validation_data=(X_test, y_test))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x7a47eb479ff0>

###Evaluate the Model

In [29]:
# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Accuracy: {accuracy * 100:.2f}%')

Accuracy: 53.54%


###Save the Trained Model

In [33]:
# Save the trained model
model.save('LSTM_model.h5')

In [36]:
import pickle
# Save the tokenizer
with open('tokenizer.pkl', 'wb') as file:
    pickle.dump(tokenizer, file)

# Extract and save the embeddings
for layer in model.layers:
    if isinstance(layer, Embedding):
        embedding_layer = layer
        break

embedding_weights = embedding_layer.get_weights()[0]

# Save the embedding weights
np.save('embedding_weights.npy', embedding_weights)

print("Model, tokenizer, and embeddings saved successfully.")

Model, tokenizer, and embeddings saved successfully.


###Predict the Movie review

In [38]:
import numpy as np
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import pickle

# Load the trained model
model = load_model('LSTM_model.h5')

# Load the tokenizer
with open('tokenizer.pkl', 'rb') as file:
    tokenizer = pickle.load(file)

# Load the embeddings
embedding_weights = np.load('embedding_weights.npy')

# Example usage
new_reviews = ["It's Comedy horror film!"]
new_reviews_seq = tokenizer.texts_to_sequences(new_reviews)
new_reviews_padded = pad_sequences(new_reviews_seq, maxlen=model.input_shape[1], padding='post')

# Predict sarcasm
predictions = model.predict(new_reviews_padded)
print(predictions)


[[0.5426161]]
