<h1>Loading and preprocessing data</h1>

In [36]:
import tensorflow as tf
from tensorflow import keras
from keras.layers import TextVectorization
import pandas as pd

In [37]:
gpu = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(gpu[0], True)

In [38]:
sheet = pd.read_csv('IMDB Dataset.csv')

In [39]:
sheet

Unnamed: 0,Review,Sentiment
0,One of the other reviewers has mentioned that ...,1
1,A wonderful little production. <br /><br />The...,1
2,I thought this was a wonderful way to spend ti...,1
3,Basically there's a family where a little boy ...,0
4,"Petter Mattei's ""Love in the Time of Money"" is...",1
...,...,...
49995,I thought this movie did a down right good job...,1
49996,"Bad plot, bad dialogue, bad acting, idiotic di...",0
49997,I am a Catholic taught in parochial elementary...,0
49998,I'm going to have to disagree with the previou...,0


In [40]:
x = sheet['Review']
y = sheet['Sentiment']

In [41]:
MAX_FEATURES = 500_000
OUTPUT_LENGTH = 1_500

In [42]:
vectorizer = TextVectorization(max_tokens = MAX_FEATURES,
                               output_sequence_length = OUTPUT_LENGTH,
                               output_mode = 'int')

In [43]:
vectorizer.adapt(x)

In [44]:
vectorized_text = vectorizer(x)

In [45]:
data = tf.data.Dataset.from_tensor_slices((vectorized_text, y))
data = data.cache()
data = data.shuffle(50_001)
data = data.batch(32)
data = data.prefetch(16)

In [46]:
DATA_SIZE = len(data)
training = data.take(int(DATA_SIZE*0.7))
validation = data.skip(int(DATA_SIZE*0.7)).take(int(DATA_SIZE*0.2))
testing = data.skip(int(DATA_SIZE*0.7)).skip(int(DATA_SIZE*0.2)).take(int(DATA_SIZE*0.1))

<h1>Creating and running the model</h1>

In [47]:
from keras.models import Sequential
from keras.layers import Embedding, Bidirectional, LSTM, Dense, Input

In [48]:
model = Sequential()
model.add(Embedding(MAX_FEATURES + 1, 32))
model.add(Bidirectional(LSTM(32, activation = 'tanh')))

model.add(Dense(256, activation = 'relu'))
model.add(Dense(512, activation = 'relu'))
model.add(Dense(256, activation = 'relu'))

model.add(Dense(1, activation = 'sigmoid'))

In [49]:
model.compile(optimizer = 'Adam', loss = 'BinaryCrossentropy', metrics = ['accuracy'])

In [50]:
for i, layer in enumerate(model.layers):
    layer._name = 'layer_' + str(i+1)
    
model._name = 'Movie_Reviews_Analysis'
    
model.summary()

Model: "Movie_Reviews_Analysis"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 layer_1 (Embedding)         (None, None, 32)          16000032  
                                                                 
 layer_2 (Bidirectional)     (None, 64)                16640     
                                                                 
 layer_3 (Dense)             (None, 256)               16640     
                                                                 
 layer_4 (Dense)             (None, 512)               131584    
                                                                 
 layer_5 (Dense)             (None, 256)               131328    
                                                                 
 layer_6 (Dense)             (None, 1)                 257       
                                                                 
Total params: 16,296,481
Trainable params: 1

In [52]:
history = model.fit(training, epochs = 6, validation_data = validation)

Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6


In [53]:
model.evaluate(testing)



[0.043959058821201324, 0.9857772588729858]

In [54]:
model.save('model.h5')

In [55]:
vectorizer_model = Sequential()
vectorizer_model.add(Input(shape=(1,)))
vectorizer_model.add(vectorizer)

In [56]:
vectorizer_model.save('vector_model.tf')

INFO:tensorflow:Assets written to: vector_model.tf\assets


In [6]:
from matplotlib import pyplot as plt

In [None]:
x = range(1,7)
plt.plot(x, history.history['loss'], label = 'Training loss')
plt.plot(x, history.history['val_loss'], label = 'Validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

In [None]:
x = range(1,7)
plt.plot(x, history.history['accuracy'], label = 'Training accuracy')
plt.plot(x, history.history['val_accuracy'], label = 'Validation accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()