##  IMDB Movie Review Classification without Regularization

In [None]:
import numpy as np
from keras.datasets import imdb
from keras.models import Sequential
from keras.layers import Input, Dense
from keras.optimizers import RMSprop
import matplotlib.pyplot as plt

## Data Preparation

In [None]:
(X_train, Y_train), (X_test, Y_test) = imdb.load_data(num_words=10000, seed=19) 
# Keeps 10000 most frequently occuring words in the trainong data

# Encoding the integer sequences into a binary matrix
def vectorize_sequences(sequences, dimension=10000):
    results = np.zeros((len(sequences), dimension))
    for i, sequence in enumerate(sequences):
        results[i, sequence] = 1.
    return results

x_train = vectorize_sequences(X_train)
x_test = vectorize_sequences(X_test)

# vectorize labels
y_train = np.asarray(Y_train).astype('float32')
y_test = np.asarray(Y_test).astype('float32')

In [None]:
print(X_train[0], len(X_train[0]))
print(x_train[0], len(x_train[0]))

In [None]:
print(Y_train[0])
print(y_train[0])

## Splitting into training and validation subsets

In [None]:
print(len(x_train))
print(x_train.shape)

In [None]:
x_val = x_train[:10000, :]
x_train = x_train[10000: ,  :]
print(x_train.shape, x_val.shape)
y_val = y_train[:10000]
y_train = y_train[10000:]
print(y_train.shape, y_val.shape)

## Model Creation

In [None]:
model = Sequential()
model.add(Input((10000,)))
model.add(Dense(16, activation="relu"))
model.add(Dense(16, activation="relu"))
model.add(Dense(1, activation="sigmoid"))

print(model.summary())

In [None]:
model.compile(optimizer=RMSprop(learning_rate=0.001), loss="binary_crossentropy", metrics=["accuracy"])

In [None]:
# Model training
epochs = 20
history = model.fit(x_train, y_train,  epochs=epochs, batch_size=512, validation_data=(x_val, y_val))
          

## Visualization for overfitting

In [None]:
hist = history.history
plt.plot(range(1, epochs+1), hist["accuracy"],marker="*")
plt.plot(range(1, epochs+1), hist["val_accuracy"], marker="^")
plt.legend(["Train", "validation"])
plt.xlabel("epoch")
plt.ylabel("Accuracy")
plt.show()


In [None]:
model.save("IMDB.keras")