load modules and data

In [None]:
import tensorflow as tf
import pandas as pd
from sklearn.model_selection import train_test_split

path = "../Data/IMDB-Dataset.csv"
data = pd.read_csv(path, delimiter=',')

MAX_FEATURES = 10000
SEQUENCE_LENGTH = 250

print(data.head())
print(data.shape)

In [None]:
data['review'] = data['review'].str.replace("<br />"," ")

# transform string to integer
# new column, set all values to 0

data['rating'] = 0

# if the content of the 'sentiment' column is equal to 'positive', set
# the entry in the 'rate' column to 1

data.loc[data['sentiment'] == 'positive', 'rating'] = 1

col = data['rating']

# remove the 'sentiment' and 'rate' columns
data.drop(['sentiment', 'rating'], axis = 1, inplace=True)

print(data)
print(col)

In [None]:
# create four tables from the two tables
train_data, test_data, train_col, test_col = train_test_split(data,col, test_size=0.2)

In [None]:
transform = tf.keras.layers.TextVectorization(max_tokens=MAX_FEATURES,output_sequence_length=SEQUENCE_LENGTH)

transform.adapt(train_data)

tain_data_transformed = transform(train_data)
test_data_transformed = transform(test_data)

print(tain_data_transformed)
#print(transform.get_vocabulary()[30])

In [None]:
model = tf.keras.Sequential([
  tf.keras.layers.Embedding(MAX_FEATURES, 16),
  tf.keras.layers.GlobalAveragePooling1D(),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(2, activation=tf.nn.softmax)
])

In [None]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

cb_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)

model.fit(tain_data_transformed, train_col, validation_data=(test_data_transformed, test_col), epochs=100, callbacks=[cb_early])

In [None]:
examples = [
    "The movie was great",
    "It was boring",
    "Don't waste your time",
]

txt = transform(examples)
pred = model.predict([txt])
print(pred)