## Submitted by
### Anurag Nagarkoti (239426), Wahab Haseeb Bhatti (239978), Suyash Gawandi (239716)

In [1]:
import tensorflow as tf
import numpy as np
from matplotlib import pyplot as plt
import time

In [2]:
max_words = 20000
max_len = 200

(train_sequences, train_labels), (test_sequences, test_labels) = tf.keras.datasets.imdb.load_data(num_words=max_words)


def preprocess(sequences, labels):
    return sequences, labels.astype(np.int32)

train_sequences, train_labels = preprocess(train_sequences, train_labels)
test_sequences, test_labels = preprocess(test_sequences, test_labels)

In [3]:
def gen_train():
    for sequence, label in zip(train_sequences, train_labels):
        yield sequence, label

def gen_test():
    for sequence, label in zip(test_sequences, test_labels):
        yield sequence, label


In [4]:
def preprocess_data(is_bucketing):

    max_words = 5000
    max_len = 200


    train_data = tf.data.Dataset.from_generator(gen_train, output_signature=(
            tf.TensorSpec(shape=(None,), dtype=tf.int32),
            tf.TensorSpec(shape=(), dtype=tf.int32)))

    test_data = tf.data.Dataset.from_generator(gen_test, output_signature=(
            tf.TensorSpec(shape=(None,), dtype=tf.int32),
            tf.TensorSpec(shape=(), dtype=tf.int32)))

    if is_bucketing:
        buckets = [50, 100, 150, 200, 250, 300, 350, 400, 450, 500]
        bucket_batch_size = [200] * (len(buckets) + 1) 


        train_data = train_data.bucket_by_sequence_length(lambda sequence, label: tf.shape(sequence)[0],
                                                        bucket_boundaries=buckets, bucket_batch_sizes=bucket_batch_size, drop_remainder=True)

        test_data = test_data.bucket_by_sequence_length(lambda sequence, label: tf.shape(sequence)[0],
                                                        bucket_boundaries=buckets, bucket_batch_sizes=bucket_batch_size, drop_remainder=True)
    
    else:
        train_data = train_data.shuffle(25000).padded_batch(100).repeat(5)
        test_data = test_data.shuffle(25000).padded_batch(100).repeat(5)

    return train_data, test_data

In [15]:
def create_model(config):

    loss = tf.losses.BinaryCrossentropy(from_logits=True)
    optimizer = tf.optimizers.Adam()
    metrics = tf.metrics.BinaryAccuracy()

    if config == 'bilstm':
        model = tf.keras.Sequential([tf.keras.layers.Embedding(max_words, 20, mask_zero=True),
                                    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(12, return_sequences=True)),
                                    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(5)),
                                    tf.keras.layers.Dense(1)])

        model.compile(optimizer=optimizer, 
                    loss=loss,
                    metrics=metrics
                    )
                    
    if config =='gru':
        model = tf.keras.Sequential([tf.keras.layers.Embedding(max_words, 20, mask_zero=True),
                                    tf.keras.layers.GRU(20, return_sequences=True),
                                    tf.keras.layers.Bidirectional(tf.keras.layers.GRU(10)),
                                    tf.keras.layers.Dense(1)])
        
        model.compile(optimizer=optimizer, 
                    loss=loss,
                    metrics=metrics
                    )
    
    if config =='lstm':
        model = tf.keras.Sequential([tf.keras.layers.Embedding(max_words, 100, mask_zero=True),
                                tf.keras.layers.LSTM(50, return_sequences=True),
                                tf.keras.layers.LSTM(50),
                                tf.keras.layers.Dropout(0.5),
                                tf.keras.layers.Dense(10, activation= 'relu'),
                                tf.keras.layers.Dense(1)])

        model.compile(optimizer=optimizer, 
                    loss=loss,
                    metrics=metrics
                    )

    return model       

In [6]:
train_data, test_data= preprocess_data(True)

2023-11-22 04:41:12.002156: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M1
2023-11-22 04:41:12.002172: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 8.00 GB
2023-11-22 04:41:12.002176: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 2.67 GB
2023-11-22 04:41:12.002363: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:303] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2023-11-22 04:41:12.002377: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:269] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [7]:
#Bucketing= True

with tf.device('/CPU:0'):

    model= create_model(config='bilstm')
    model.summary()
    model.fit(train_data, epochs=10, validation_data=test_data)



Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, None, 20)          400000    
                                                                 
 bidirectional (Bidirection  (None, None, 24)          3168      
 al)                                                             
                                                                 
 bidirectional_1 (Bidirecti  (None, 10)                1200      
 onal)                                                           
                                                                 
 dense (Dense)               (None, 1)                 11        
                                                                 
Total params: 404379 (1.54 MB)
Trainable params: 404379 (1.54 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
Epoch 1/10
Epoch 2/1

In [9]:
with tf.device('/CPU:0'):
    model= create_model(config='gru')
    model.summary()
    model.fit(train_data, epochs=10, validation_data=test_data)



Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_2 (Embedding)     (None, None, 20)          400000    
                                                                 
 gru_2 (GRU)                 (None, None, 20)          2520      
                                                                 
 bidirectional_3 (Bidirecti  (None, 20)                1920      
 onal)                                                           
                                                                 
 dense_2 (Dense)             (None, 1)                 21        
                                                                 
Total params: 404461 (1.54 MB)
Trainable params: 404461 (1.54 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8

KeyboardInterrupt: 

In [12]:
with tf.device('/CPU:0'):

    model= create_model(config='lstm')
    model.summary()
    model.fit(train_data, epochs=10, validation_data=test_data)



Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_4 (Embedding)     (None, None, 20)          400000    
                                                                 
 lstm_4 (LSTM)               (None, None, 20)          3280      
                                                                 
 lstm_5 (LSTM)               (None, 20)                3280      
                                                                 
 dense_4 (Dense)             (None, 1)                 21        
                                                                 
Total params: 406581 (1.55 MB)
Trainable params: 406581 (1.55 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [16]:
with tf.device('/CPU:0'):

    model= create_model(config='lstm')
    model.summary()
    model.fit(train_data, epochs=10, validation_data=test_data)



Model: "sequential_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_6 (Embedding)     (None, None, 100)         2000000   
                                                                 
 lstm_8 (LSTM)               (None, None, 50)          30200     
                                                                 
 lstm_9 (LSTM)               (None, 50)                20200     
                                                                 
 dropout_1 (Dropout)         (None, 50)                0         
                                                                 
 dense_7 (Dense)             (None, 10)                510       
                                                                 
 dense_8 (Dense)             (None, 1)                 11        
                                                                 
Total params: 2050921 (7.82 MB)
Trainable params: 2050

KeyboardInterrupt: 

In [1]:
# Batch padding took longer to train
# RNNs seem to be overfitting 