In [None]:
import tensorflow as tf
# from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np

# Define the hyperparameters
vocab_size = 10000  # The size of the vocabulary
max_length = 200  # The maximum length of a sentence
embedding_dim = 32  # The dimensions of the word embedding
num_filters = 128  # The number of filters for the convolutional layer
kernel_size = 3  # The kernel size for the convolutional layer
hidden_dims = 64  # The number of units in the hidden dense layer
batch_size = 128  # The batch size
epochs = 10  # The number of epochs




In [None]:
# Load the data
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.imdb.load_data(num_words=vocab_size)



In [None]:
def sequences_to_matrix(sequences, mode='binary'):
  # create a matrix with the size of the vocabulary
  # vocab_size = len(set([item for sublist in sequences for item in sublist]))
  matrix = np.zeros((len(sequences), vocab_size))
  # fill the matrix according to the mode
  if mode == 'binary':
      for i, seq in enumerate(sequences):
          matrix[i, seq] = 1
  elif mode == 'count':
      for i, seq in enumerate(sequences):
          matrix[i, seq] += 1
  elif mode == 'tfidf':
      # calculate the document frequency of each word
      df = np.zeros(vocab_size)
      for seq in sequences:
          df[seq] += 1
      # calculate the inverse document frequency
      idf = np.log((1 + len(sequences)) / (1 + df)) + 1
      # calculate the term frequency
      for i, seq in enumerate(sequences):
          matrix[i, seq] = np.log(1 + seq.count(seq)) * idf[seq]

  return matrix

In [None]:
def pad_sequences(sequences, maxlen=None, padding='pre', truncating='pre'):
  # get the maximum length of the sequences
  if maxlen is None:
    maxlen = max([len(seq) for seq in sequences])
  
  # create a matrix with the size of the padded sequences
  matrix = np.zeros((len(sequences), maxlen))

  # pad the sequences
  for i, seq in enumerate(sequences):
      if padding == 'pre':
          matrix[i, -len(seq):] = seq[:maxlen]
      elif padding == 'post':
          matrix[i, :len(seq)] = seq[-maxlen:]

  # truncate the sequences
  if truncating == 'pre':
      matrix = matrix[:, -maxlen:]
  elif truncating == 'post':
      matrix = matrix[:, :maxlen]

  return matrix

In [None]:
# Convert the data to sequences of integers
x_train = sequences_to_matrix(x_train, mode="binary")
x_test = sequences_to_matrix(x_test, mode="binary")



In [None]:
len(x_train[0])

10000

In [None]:
# Pad the sequences to the same length
x_train = pad_sequences(x_train, maxlen=max_length, padding="post", truncating="pre")
x_test = pad_sequences(x_test, maxlen=max_length, padding="post")


In [None]:
# Build the model
model = tf.keras.Sequential()

# Add the embedding layer
model.add(tf.keras.layers.Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=max_length))

# Add the convolutional and pooling layers
model.add(tf.keras.layers.Conv1D(filters=num_filters, kernel_size=kernel_size, activation="relu"))
model.add(tf.keras.layers.MaxPooling1D(pool_size=2))
model.add(tf.keras.layers.Conv1D(filters=num_filters, kernel_size=kernel_size, activation="relu"))
model.add(tf.keras.layers.MaxPooling1D(pool_size=2))
model.add(tf.keras.layers.Conv1D(filters=num_filters, kernel_size=kernel_size, activation="relu"))
model.add(tf.keras.layers.MaxPooling1D(pool_size=2))

# Add the dense layers
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(units=hidden_dims, activation="relu"))
model.add(tf.keras.layers.Dense(units=hidden_dims, activation="relu"))
model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Compile the model
model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

# Train the model
model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs)

# Evaluate the model on the test set
score = model.evaluate(x_test, y_test)
print("Test loss: {:.3f}".format(score[0]))
print("Test accuracy: {:.3f}".format(score[1]))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test loss: 0.603
Test accuracy: 0.688
