<a href="https://colab.research.google.com/github/soumilhooda/MLDLNNtoCV/blob/main/SentimentAnalysis_IMDB.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import tensorflow as tf
from tensorflow.keras import datasets, layers, models, preprocessing

In [2]:
max_len = 200
n_words = 1000
dim_embedding = 256
EPOCHS = 20
BATCH_SIZE = 500

In [3]:
def load_data():
    #Load Data
    (X_train, Y_train), (X_test, Y_test) = datasets.imdb.load_data(num_words=n_words)
    #Pad sequences with max_len
    X_train = preprocessing.sequence.pad_sequences(X_train, maxlen=max_len)
    X_test = preprocessing.sequence.pad_sequences(X_test, maxlen=max_len)
    return(X_train, Y_train), (X_test, Y_test)

In [4]:
def build_model():
    model = models.Sequential()
    #Input -> embedding layer. The model will take an integer matrix as input of size (batch, input_length). The output layer will be (input_length, dim_embeddin g)
    #The largest integer in the input should be no larger than n_worss.
    model.add(layers.Embedding(n_words, dim_embedding, input_length=max_len))
    model.add(layers.Dropout(0.3))
    #Takes the maximum value of either feature vector from each of the n_words feature.
    model.add(layers.GlobalMaxPooling1D())
    model.add(layers.Dense(128, activation='relu'))
    model.add(layers.Dropout(0.5))
    model.add(layers.Dense(1, activation='relu'))
    
    return model

In [6]:
(X_train, Y_train), (X_test, Y_test) = load_data()
model = build_model()
model.summary()

model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])

score = model.fit(X_train, Y_train, epochs=EPOCHS, batch_size=BATCH_SIZE, validation_data=(X_test, Y_test))

score=model.evaluate(X_test, Y_test, batch_size=BATCH_SIZE)
print("\nTest Score:", score[0])
print('Test Accuracy:', score[1])

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_1 (Embedding)     (None, 200, 256)          256000    
                                                                 
 dropout_2 (Dropout)         (None, 200, 256)          0         
                                                                 
 global_max_pooling1d_1 (Glo  (None, 256)              0         
 balMaxPooling1D)                                                
                                                                 
 dense_2 (Dense)             (None, 128)               32896     
                                                                 
 dropout_3 (Dropout)         (None, 128)               0         
                                                                 
 dense_3 (Dense)             (None, 1)                 129       
                                                      