<a href="https://colab.research.google.com/github/juagarmar/NLP/blob/main/IMDB_Sentiment.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, GlobalAveragePooling1D
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.datasets import imdb

In [2]:
# Set the number of words to keep
vocab_size = 10000

# Load the IMDB dataset
(train_data, train_labels), (test_data, test_labels) = imdb.load_data(num_words=vocab_size)

In [7]:
# Padding sequences to ensure uniform input size
max_length = 256
train_data = pad_sequences(train_data, maxlen=max_length, padding='post')
test_data = pad_sequences(test_data, maxlen=max_length, padding='post')

In [16]:
# Build the model
model = Sequential([
    Embedding(vocab_size, 16, input_length=max_length),
    GlobalAveragePooling1D(),
    Dense(16, activation='relu'),
    #Dense(8, activation='relu'),
    Dense(1, activation='sigmoid')
])

# Compile the model
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

# Model summary
model.summary()


Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_2 (Embedding)     (None, 256, 16)           160000    
                                                                 
 global_average_pooling1d_2  (None, 16)                0         
  (GlobalAveragePooling1D)                                       
                                                                 
 dense_4 (Dense)             (None, 16)                272       
                                                                 
 dense_5 (Dense)             (None, 8)                 136       
                                                                 
 dense_6 (Dense)             (None, 1)                 9         
                                                                 
Total params: 160417 (626.63 KB)
Trainable params: 160417 (626.63 KB)
Non-trainable params: 0 (0.00 Byte)
______________

In [17]:
# Train the model
epochs = 10
history = model.fit(train_data, train_labels,
                    epochs=epochs,
                    validation_data=(test_data, test_labels),
                    verbose=2)


Epoch 1/10
782/782 - 8s - loss: 0.4753 - accuracy: 0.7676 - val_loss: 0.2977 - val_accuracy: 0.8785 - 8s/epoch - 11ms/step
Epoch 2/10
782/782 - 5s - loss: 0.2345 - accuracy: 0.9087 - val_loss: 0.2859 - val_accuracy: 0.8842 - 5s/epoch - 6ms/step
Epoch 3/10
782/782 - 6s - loss: 0.1841 - accuracy: 0.9315 - val_loss: 0.2935 - val_accuracy: 0.8815 - 6s/epoch - 7ms/step
Epoch 4/10
782/782 - 5s - loss: 0.1524 - accuracy: 0.9456 - val_loss: 0.3206 - val_accuracy: 0.8766 - 5s/epoch - 6ms/step
Epoch 5/10
782/782 - 4s - loss: 0.1269 - accuracy: 0.9567 - val_loss: 0.3621 - val_accuracy: 0.8669 - 4s/epoch - 6ms/step
Epoch 6/10
782/782 - 6s - loss: 0.1075 - accuracy: 0.9650 - val_loss: 0.3928 - val_accuracy: 0.8648 - 6s/epoch - 8ms/step
Epoch 7/10
782/782 - 4s - loss: 0.0955 - accuracy: 0.9692 - val_loss: 0.4311 - val_accuracy: 0.8590 - 4s/epoch - 6ms/step
Epoch 8/10
782/782 - 6s - loss: 0.0818 - accuracy: 0.9738 - val_loss: 0.4755 - val_accuracy: 0.8557 - 6s/epoch - 7ms/step
Epoch 9/10
782/782 - 5s

In [18]:
# Evaluate the model
loss, accuracy = model.evaluate(test_data, test_labels)
print("Loss: ", loss)
print("Accuracy: ", accuracy)


Loss:  0.548712968826294
Accuracy:  0.8499600291252136
