<h1>Loading and preprocessing data</h1>

In [1]:
import tensorflow as tf
from tensorflow import keras
from keras.layers import TextVectorization
import pandas as pd

In [2]:
gpu = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(gpu[0], True)

In [3]:
sheet = pd.read_csv('IMDB Dataset.csv')

In [4]:
sheet

Unnamed: 0,Review,Sentiment
0,One of the other reviewers has mentioned that ...,1
1,A wonderful little production. <br /><br />The...,1
2,I thought this was a wonderful way to spend ti...,1
3,Basically there's a family where a little boy ...,0
4,"Petter Mattei's ""Love in the Time of Money"" is...",1
...,...,...
49995,I thought this movie did a down right good job...,1
49996,"Bad plot, bad dialogue, bad acting, idiotic di...",0
49997,I am a Catholic taught in parochial elementary...,0
49998,I'm going to have to disagree with the previou...,0


In [5]:
x = sheet['Review']
y = sheet['Sentiment']

In [6]:
MAX_FEATURES = 500_000
OUTPUT_LENGTH = 1_500

In [7]:
vectorizer = TextVectorization(max_tokens = MAX_FEATURES,
                               output_sequence_length = OUTPUT_LENGTH,
                               output_mode = 'int')

In [8]:
vectorizer.adapt(x)

In [9]:
vectorized_text = vectorizer(x)

In [10]:
data = tf.data.Dataset.from_tensor_slices((vectorized_text, y))
data = data.cache()
data = data.shuffle(50_001)
data = data.batch(32)
data = data.prefetch(16)

In [11]:
DATA_SIZE = len(data)
training = data.take(int(DATA_SIZE*0.7))
validation = data.skip(int(DATA_SIZE*0.7)).take(int(DATA_SIZE*0.2))
testing = data.skip(int(DATA_SIZE*0.7)).skip(int(DATA_SIZE*0.2)).take(int(DATA_SIZE*0.1))

<h1>Creating and running the model</h1>

In [12]:
from keras.models import Sequential
from keras.layers import Embedding, Bidirectional, LSTM, Dense, Input

In [13]:
model = Sequential()
model.add(Embedding(MAX_FEATURES + 1, 32))
model.add(Bidirectional(LSTM(32, activation = 'tanh')))

model.add(Dense(256, activation = 'relu'))
model.add(Dense(512, activation = 'relu'))
model.add(Dense(256, activation = 'relu'))

model.add(Dense(1, activation = 'sigmoid'))

In [14]:
model.compile(optimizer = 'Adam', loss = 'BinaryCrossentropy', metrics = ['accuracy'])

In [15]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, None, 32)          16000032  
                                                                 
 bidirectional (Bidirectiona  (None, 64)               16640     
 l)                                                              
                                                                 
 dense (Dense)               (None, 256)               16640     
                                                                 
 dense_1 (Dense)             (None, 512)               131584    
                                                                 
 dense_2 (Dense)             (None, 256)               131328    
                                                                 
 dense_3 (Dense)             (None, 1)                 257       
                                                        

In [28]:
history = model.fit(training, epochs = 6, validation_data = validation)

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


In [None]:
model.evaluate(testing)

  6/156 [>.............................] - ETA: 40s - loss: 0.6930 - accuracy: 0.5156

In [38]:
model.save('model.h5')

In [16]:
v_model = Sequential()
v_model.add(Input(shape=(1,)))
v_model.add(vectorizer)

In [17]:
v_model.save('vector_model.tf')

INFO:tensorflow:Assets written to: vector_model.tf\assets
