## Stage 1: Install dependencies and setting up GPU environment

In [None]:
!pip install numpy --upgrade

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting numpy
  Downloading numpy-1.24.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.3 MB)
[K     |████████████████████████████████| 17.3 MB 35.9 MB/s 
[?25hInstalling collected packages: numpy
  Attempting uninstall: numpy
    Found existing installation: numpy 1.16.1
    Uninstalling numpy-1.16.1:
      Successfully uninstalled numpy-1.16.1
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
scipy 1.7.3 requires numpy<1.23.0,>=1.16.5, but you have numpy 1.24.0 which is incompatible.
numba 0.56.4 requires numpy<1.24,>=1.18, but you have numpy 1.24.0 which is incompatible.[0m
Successfully installed numpy-1.24.0


## Stage 2: Importing project dependencies

In [None]:
import numpy as np
import tensorflow as tf

from tensorflow.keras.datasets import imdb



In [None]:
tf.__version__

'2.9.2'

## Stage 3: Dataset preprocessing

### Setting up dataset parameters

In [None]:
number_of_words = 20000
max_len = 100

### Loading the IMDB dataset

In [None]:
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=number_of_words)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz


In [None]:
print(X_train.shape[1])
print(X_train[0])

(25000, 100)
[ 1415    33     6    22    12   215    28    77    52     5    14   407
    16    82 10311     8     4   107   117  5952    15   256     4     2
     7  3766     5   723    36    71    43   530   476    26   400   317
    46     7     4 12118  1029    13   104    88     4   381    15   297
    98    32  2071    56    26   141     6   194  7486    18     4   226
    22    21   134   476    26   480     5   144    30  5535    18    51
    36    28   224    92    25   104     4   226    65    16    38  1334
    88    12    16   283     5    16  4472   113   103    32    15    16
  5345    19   178    32]


### Padding all sequences to be the same length 

In [None]:
X_train = tf.keras.preprocessing.sequence.pad_sequences(X_train, maxlen=max_len)

In [None]:
X_test = tf.keras.preprocessing.sequence.pad_sequences(X_test, maxlen=max_len)

### Setting up Embedding Layer parameters

In [None]:
vocab_size = number_of_words
vocab_size

20000

In [None]:
embed_size = 128

## Step 4: Building a Recurrent Neural Network

### Defining the model

In [None]:
model = tf.keras.Sequential()

### Adding the Embeding Layer

In [None]:
model.add(tf.keras.layers.Embedding(vocab_size, embed_size, input_shape=(X_train.shape[1],))) # [1] because shape is going to be 25k, 100 so you need the 100 for the y part, aka the max len (or just len bc padding) of the sequences

### Adding the LSTM Layer

- units: 128
- activation: tanh

In [None]:
model.add(tf.keras.layers.LSTM(units=128, activation='tanh'))

### Adding the Dense output layer

- units: 1
- activation: sigmoid

In [None]:
model.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))

### Compiling the model

In [None]:
model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 100, 128)          2560000   
                                                                 
 lstm (LSTM)                 (None, 128)               131584    
                                                                 
 dense (Dense)               (None, 1)                 129       
                                                                 
Total params: 2,691,713
Trainable params: 2,691,713
Non-trainable params: 0
_________________________________________________________________


### Training the model

In [None]:
model.fit(X_train, y_train, epochs=3, batch_size=128)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x7f73f4dee0d0>

### Evaluating the model

In [None]:
test_loss, test_acurracy = model.evaluate(X_test, y_test)



In [None]:
print("Test accuracy: {}".format(test_acurracy))

Test accuracy: 0.8447999954223633
