In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
from tensorflow.keras.initializers import Constant

In [2]:
top_words = 10000
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=top_words)
max_review_length = 500

In [3]:
x_train = pad_sequences(x_train, maxlen=max_review_length)
x_test = pad_sequences(x_test, maxlen=max_review_length)

In [4]:
def load_glove_embeddings(path):
    embeddings_index = {}
    with open(path, encoding="utf8") as f:
        for line in f:
            values = line.split()
            word = values[0]
            coefs = np.asarray(values[1:], dtype='float32')
            embeddings_index[word] = coefs
    return embeddings_index

In [5]:
glove_path = '/Users/sivaprasanth/Documents/DL/Ex6/glove.6B.100d.txt'
embeddings_index = load_glove_embeddings(glove_path)
embedding_dim = 100
embedding_matrix = np.zeros((top_words, embedding_dim))

In [6]:
for word, i in imdb.get_word_index().items():
    if i < top_words:
        embedding_vector = embeddings_index.get(word)
        if embedding_vector is not None:
            embedding_matrix[i] = embedding_vector

In [7]:
model = Sequential([
    Embedding(top_words, embedding_dim, embeddings_initializer=Constant(embedding_matrix), input_length=max_review_length, trainable=False),
    LSTM(128),
    Dense(1, activation='sigmoid')
])

In [8]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.fit(x_train, y_train, batch_size=32, epochs=3, validation_split=0.2)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.src.callbacks.History at 0x2c71575e0>

In [9]:
accuracy = model.evaluate(x_test, y_test)[1]
print(f'Test Accuracy: {accuracy:.2f}')

