<a href="https://colab.research.google.com/github/mahima-c/deep-learning/blob/main/Sentiment_analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Sentiment analysis**


In [3]:
import tensorflow as tf
from tensorflow.keras import datasets, layers, models, preprocessing
import tensorflow_datasets as tfds

max_len = 200
n_words = 10000
dim_embedding = 256
EPOCHS = 20
BATCH_SIZE = 500

def load_data():
        # Load data.
        (X_train, y_train), (X_test, y_test) = datasets.imdb.load_data(num_words=n_words)
        # Pad sequences with max_len.
        X_train = preprocessing.sequence.pad_sequences(X_train, maxlen=max_len)
        X_test = preprocessing.sequence.pad_sequences(X_test, maxlen=max_len)
        return (X_train, y_train), (X_test, y_test)

def build_model():
    model = models.Sequential()
    # Input: - eEmbedding Layer.
    # The model will take as input an integer matrix of size (batch,     # input_length).
    # The model will output dimension (input_length, dim_embedding).
    # The largest integer in the input should be no larger
    # than n_words (vocabulary size).
    model.add(layers.Embedding(n_words, dim_embedding, input_length=max_len))
    model.add(layers.Dropout(0.3))
    # Takes the maximum value of either feature vector from each of     # the n_words features.
    model.add(layers.GlobalMaxPooling1D())
    model.add(layers.Dense(128, activation='relu'))
    model.add(layers.Dropout(0.5))
    model.add(layers.Dense(1, activation='sigmoid'))
    return model


In [4]:
(X_train, y_train), (X_test, y_test) = load_data()
model = build_model()
model.summary()
model.compile(optimizer = "adam", loss = "binary_crossentropy",
 metrics = ["accuracy"]
)
score = model.fit(X_train, y_train,
 epochs = EPOCHS,
 batch_size = BATCH_SIZE,
 validation_data = (X_test, y_test)
)
score = model.evaluate(X_test, y_test, batch_size=BATCH_SIZE)
print("\nTest score:", score[0])
print('Test accuracy:', score[1])

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz


  x_train, y_train = np.array(xs[:idx]), np.array(labels[:idx])
  x_test, y_test = np.array(xs[idx:]), np.array(labels[idx:])


Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 200, 256)          2560000   
_________________________________________________________________
dropout (Dropout)            (None, 200, 256)          0         
_________________________________________________________________
global_max_pooling1d (Global (None, 256)               0         
_________________________________________________________________
dense (Dense)                (None, 128)               32896     
_________________________________________________________________
dropout_1 (Dropout)          (None, 128)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 129       
Total params: 2,593,025
Trainable params: 2,593,025
Non-trainable params: 0
______________________________________________

**Predicting output**


In [8]:
# Making predictions.

# predictions = model.predict(X)

array([1987,    2,   45,   55,  221,   15,  670, 5304,  526,   14, 1069,
          4,  405,    5, 2438,    7,   27,   85,  108,  131,    4, 5045,
       5304, 3884,  405,    9, 3523,  133,    5,   50,   13,  104,   51,
         66,  166,   14,   22,  157,    9,    4,  530,  239,   34, 8463,
       2801,   45,  407,   31,    7,   41, 3778,  105,   21,   59,  299,
         12,   38,  950,    5, 4521,   15,   45,  629,  488, 2733,  127,
          6,   52,  292,   17,    4, 6936,  185,  132, 1988, 5304, 1799,
        488, 2693,   47,    6,  392,  173,    4,    2, 4378,  270, 2352,
          4, 1500,    7,    4,   65,   55,   73,   11,  346,   14,   20,
          9,    6,  976, 2078,    7, 5293,  861,    2,    5, 4182,   30,
       3127,    2,   56,    4,  841,    5,  990,  692,    8,    4, 1669,
        398,  229,   10,   10,   13, 2822,  670, 5304,   14,    9,   31,
          7,   27,  111,  108,   15, 2033,   19, 7836, 1429,  875,  551,
         14,   22,    9, 1193,   21,   45, 4829,   