In [1]:
#!pip install numpy==1.19.5

# Sentiment Analysis on IMDB Reviews using LSTM and GRU in Keras

### Steps
- Load the dataset (50K IMDB Movie Review)
- Clean Dataset
- Encode Sentiments
- Split Dataset
- Tokenize and Pad/Truncate Reviews
- Build Architecture/Model
- Train and Test
- Import all the libraries needed 

In [1]:
import pandas as pd    # to load dataset
import numpy as np     # for mathematic equation
from nltk.corpus import stopwords   # to get collection of stopwords
from sklearn.model_selection import train_test_split       # for splitting dataset
from tensorflow.keras.preprocessing.text import Tokenizer  # to encode text to int
from tensorflow.keras.preprocessing.sequence import pad_sequences   # to do padding or truncating
from tensorflow.keras.models import Sequential     # the model
from tensorflow.keras.layers import Embedding, LSTM, Dense # layers of the architecture
from tensorflow.keras.callbacks import ModelCheckpoint   # save model
from tensorflow.keras.models import load_model   # load saved model
import tensorflow.keras as keras
import tensorflow.keras.layers as layers
import re
import tensorflow as tf

In [2]:
data = pd.read_csv( './data/IMDb/IMDB Dataset.csv')
print(data)

                                                  review sentiment
0      One of the other reviewers has mentioned that ...  positive
1      A wonderful little production. <br /><br />The...  positive
2      I thought this was a wonderful way to spend ti...  positive
3      Basically there's a family where a little boy ...  negative
4      Petter Mattei's "Love in the Time of Money" is...  positive
...                                                  ...       ...
49995  I thought this movie did a down right good job...  positive
49996  Bad plot, bad dialogue, bad acting, idiotic di...  negative
49997  I am a Catholic taught in parochial elementary...  negative
49998  I'm going to have to disagree with the previou...  negative
49999  No one expects the Star Trek movies to be high...  negative

[50000 rows x 2 columns]


Stop Word is a commonly used words in a sentence, usually a search engine is programmed to ignore this words (i.e. "the", "a", "an", "of", etc.)
Declaring the english stop words 

In [3]:
english_stops = set(stopwords.words('english'))


## Load and Clean Dataset
In the original dataset, the reviews are still dirty. There are still html tags, numbers, uppercase, and punctuations. This will not be good for training, so in load_dataset() function, beside loading the dataset using pandas, I also pre-process the reviews by removing html tags, non alphabet (punctuations and numbers), stop words, and lower case all of the reviews.

## Encode Sentiments
In the same function, I also encode the sentiments into integers (0 and 1). Where 0 is for negative sentiments and 1 is for positive sentiments.

In [4]:
def load_dataset():
    df = pd.read_csv('./data/IMDb/IMDB Dataset.csv')
    x_data = df['review']       # Reviews/Input
    y_data = df['sentiment']    # Sentiment/Output

    # PRE-PROCESS REVIEW
    x_data = x_data.replace({'<.*?>': ''}, regex = True)          # remove html tag
    x_data = x_data.replace({'[^A-Za-z]': ' '}, regex = True)     # remove non alphabet
    x_data = x_data.apply(lambda review: [w for w in review.split() if w not in english_stops])  # remove stop words
    x_data = x_data.apply(lambda review: [w.lower() for w in review])   # lower case
    
    # ENCODE SENTIMENT -> 0 & 1
    y_data = y_data.replace('positive', 1)
    y_data = y_data.replace('negative', 0)

    return x_data, y_data

x_data, y_data = load_dataset()

print('Reviews')
print(x_data, '\n')
print('Sentiment')
print(y_data)

Reviews
0        [one, reviewers, mentioned, watching, oz, epis...
1        [a, wonderful, little, production, the, filmin...
2        [i, thought, wonderful, way, spend, time, hot,...
3        [basically, family, little, boy, jake, thinks,...
4        [petter, mattei, love, time, money, visually, ...
                               ...                        
49995    [i, thought, movie, right, good, job, it, crea...
49996    [bad, plot, bad, dialogue, bad, acting, idioti...
49997    [i, catholic, taught, parochial, elementary, s...
49998    [i, going, disagree, previous, comment, side, ...
49999    [no, one, expects, star, trek, movies, high, a...
Name: review, Length: 50000, dtype: object 

Sentiment
0        1
1        1
2        1
3        0
4        1
        ..
49995    1
49996    0
49997    0
49998    0
49999    0
Name: sentiment, Length: 50000, dtype: int64


## Split Dataset
In this work, I decided to split the data into 80% of Training and 20% of Testing set using train_test_split method from Scikit-Learn. By using this method, it automatically shuffles the dataset. We need to shuffle the data because in the original dataset, the reviews and sentiments are in order, where they list positive reviews first and then negative reviews. By shuffling the data, it will be distributed equally in the model, so it will be more accurate for predictions.

In [5]:
x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size = 0.2)

print('Train Set')
print(x_train, '\n')
print(x_test, '\n')
print('Test Set')
print(y_train, '\n')
print(y_test)

Train Set
48330    [haven, played, game, don, bother, this, final...
33365    [to, day, i, never, seen, elizabeth, shue, any...
4920     [leos, carax, brilliant, one, best, film, came...
6206     [one, favorite, movies, ever, along, casablanc...
27202    [good, attempt, tackling, unconventional, topi...
                               ...                        
13542    [s, s, van, dine, must, shrewd, businessman, d...
16524    [what, freaking, problem, do, nothing, better,...
41064    [forget, jaded, comments, come, this, action, ...
2372     [this, movie, definitely, one, finest, kind, a...
18284    [being, fan, silent, films, i, looked, forward...
Name: review, Length: 40000, dtype: object 

41302    [if, want, learn, something, spanish, civil, w...
20106    [forget, every, spy, movie, ever, seen, life, ...
22989    [it, known, whether, marilyn, monroe, ever, me...
13332    [can, scarcely, imagine, better, movie, than, ...
12266    [la, petit, tourette, pretty, funny, south, pa...
 

Function for getting the maximum review length, by calculating the mean of all the reviews length (using numpy.mean)

In [6]:
def get_max_length():
    review_length = []
    for review in x_train:
        review_length.append(len(review))

    return int(np.ceil(np.mean(review_length)))

### Tokenize and Pad/Truncate Reviews¶
A Neural Network only accepts numeric data, so we need to encode the reviews. I use tensorflow.keras.preprocessing.text.Tokenizer to encode the reviews into integers, where each unique word is automatically indexed (using fit_on_texts method) based on x_train.
x_train and x_test is converted into integers using texts_to_sequences method.

Each reviews has a different length, so we need to add padding (by adding 0) or truncating the words to the same length (in this case, it is the mean of all reviews length) using tensorflow.keras.preprocessing.sequence.pad_sequences.

post, pad or truncate the words in the back of a sentence
pre, pad or truncate the words in front of a sentence

In [7]:
# ENCODE REVIEW
token = Tokenizer(lower=False)    # no need lower, because already lowered the data in load_data()
token.fit_on_texts(x_train)
x_train = token.texts_to_sequences(x_train)
x_test = token.texts_to_sequences(x_test)

max_length = get_max_length()

x_train = pad_sequences(x_train, maxlen=max_length, padding='post', truncating='post')
x_test = pad_sequences(x_test, maxlen=max_length, padding='post', truncating='post')

total_words = len(token.word_index) + 1   # add 1 because of 0 padding

print('Encoded X Train\n', x_train, '\n')
print('Encoded X Test\n', x_test, '\n')
print('Maximum review length: ', max_length)

Encoded X Train
 [[10974   162   352 ...  6144 13746  6748]
 [  282   156     1 ...     0     0     0]
 [23836 10480   422 ...     0     0     0]
 ...
 [  724  5826   709 ...     0     0     0]
 [    8     3   310 ...     0     0     0]
 [ 2093   238  1205 ...     0     0     0]] 

Encoded X Test
 [[   55    88   744 ...   156    11  7637]
 [  724    83  2513 ...     0     0     0]
 [    7   460   606 ...  5760   110 11843]
 ...
 [   52   192   629 ...   136     6   114]
 [   39   356  1819 ...    14  4738    10]
 [    8  1032  1502 ...    88   265   152]] 

Maximum review length:  130



### Build Architecture/Model
Embedding Layer: in simple terms, it creates word vectors of each word in the word_index and group words that are related or have similar meaning by analyzing other words around them.

LSTM Layer: to make a decision to keep or throw away data by considering the current input, previous output, and previous memory. There are some important components in LSTM.

Forget Gate, decides information is to be kept or thrown away
Input Gate, updates cell state by passing previous output and current input into sigmoid activation function
Cell State, calculate new cell state, it is multiplied by forget vector (drop value if multiplied by a near 0), add it with the output from input gate to update the cell state value.
Ouput Gate, decides the next hidden state and used for predictions
Dense Layer: compute the input with the weight matrix and bias (optional), and using an activation function. I use Sigmoid activation function for this work because the output is only 0 or 1.

The optimizer is Adam and the loss function is Binary Crossentropy because again the output is only 0 and 1, which is a binary number.

In [8]:
inputs = keras.Input(shape=(None,), dtype='int32')
x = layers.Embedding(total_words, 128)(inputs)
x = layers.Bidirectional(layers.LSTM(64, return_sequences=True))(x)
x = layers.Bidirectional(layers.LSTM(64))(x)

outputs = layers.Dense(1,activation="sigmoid")(x)
model = keras.Model(inputs, outputs)
model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, None)]            0         
_________________________________________________________________
embedding (Embedding)        (None, None, 128)         11838976  
_________________________________________________________________
bidirectional (Bidirectional (None, None, 128)         98816     
_________________________________________________________________
bidirectional_1 (Bidirection (None, 128)               98816     
_________________________________________________________________
dense (Dense)                (None, 1)                 129       
Total params: 12,036,737
Trainable params: 12,036,737
Non-trainable params: 0
_________________________________________________________________


In [9]:
# ARCHITECTURE
''' EMBED_DIM = 32
LSTM_OUT = 64

model = Sequential()
model.add(Embedding(total_words, EMBED_DIM, input_length = max_length))
model.add(LSTM(LSTM_OUT))
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])

print(model.summary())'''

" EMBED_DIM = 32\nLSTM_OUT = 64\n\nmodel = Sequential()\nmodel.add(Embedding(total_words, EMBED_DIM, input_length = max_length))\nmodel.add(LSTM(LSTM_OUT))\nmodel.add(Dense(1, activation='sigmoid'))\nmodel.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])\n\nprint(model.summary())"

In [10]:
model_name = 'lstm-imdb'
training_round = 10

In [11]:
lr=0.001
opt = keras.optimizers.Adam(learning_rate=lr)
model.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy'])

In [12]:
import time
import os
import copy
import csv
import pandas as pd
from datetime import datetime

date = datetime.today().strftime('%Y-%m-%d')

In [13]:
check_point = keras.callbacks.ModelCheckpoint(filepath="./data/26-10-2021/Train1/Keras/lstm/tf_{}_{}_{}.h5".format(model_name,date, training_round),
                                              monitor="val_accuracy",
                                              mode="max",
                                              save_best_only=True,)

In [14]:
epochs = 50
batch_size = 128

In [15]:
%%time
history = model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1,
                        validation_data=(x_test, y_test),
                        callbacks=[check_point])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
CPU times: user 1h 56min 32s, sys: 37min 6s, total: 2h 33min 38s
Wall time: 44min 59s


In [16]:
def export_history_csv(history_, model_name):
    since = time.time()
   
    data_file = open('./data/26-10-2021/Train1/Keras/lstm/tf_{}_{}_{}.csv'.format(model_name, date, training_round), mode='w+', newline='', encoding='utf-8')
    data_writer = csv.writer(data_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
    data_writer.writerow(['Model','type', 'Dataset', 'Epoch', 'criterion', 'optimizer', 'scheduler','Train_loss', 'Train_acc', "val_loss", "Val_acc", 'time','Elapse_time','date'])
    for epoch_ in history_.epoch:
        data_writer.writerow([history_.model,'tensorflow', 'hymenoptera', epoch_, '', 
                          history_.model.optimizer, '',history_.history['loss'][epoch_], history_.history['accuracy'][epoch_], 
                          history_.history['val_loss'][epoch_], history_.history['val_accuracy'][epoch_], '','',date])
    data_file.close()

In [17]:
export_history_csv(history, model_name)

In [18]:
model.save("./data/26-10-2021/Train1/Keras/lstm/tf_{}_{}_{}.h5".format(model_name, date, training_round))

In [32]:
size1 = os.path.getsize("./data/23-08-2021/tf_{}.h5".format(model_name))
size1

size2 = os.path.getsize("./data/23-08-2021/torch_LSTM-IMDb.pth")
print(size1, size2)

144762352 3685471


In [10]:
path = './data/23-08-2021/'
since_0 = time.time()
#model_path = 'tf_Lenet5_mnist_2021-08-24-10:35:35'
model_name = 'tf_lstm-imdb_2021-09-11-16:49:06'
model = tf.keras.models.load_model(path+ model_name+'.h5')
t_elapsed_0 = time.time() - since_0
size0 = os.path.getsize(path+ model_name+'.h5')
size0

144762352

In [11]:
from torch.utils.data import TensorDataset, DataLoader
import torch

In [12]:
# create Tensor datasets
train_data = TensorDataset(torch.as_tensor(np.array(x_train).astype('int32')), torch.as_tensor(np.array(y_train).astype('int32')))
valid_data = TensorDataset(torch.as_tensor(np.array(x_test).astype('int32')), torch.as_tensor(np.array(y_test).astype('int32')))

# dataloaders
batch_size = 200

# make sure to SHUFFLE your data
train_loader = DataLoader(train_data, shuffle=True, batch_size=batch_size)
valid_loader = DataLoader(valid_data, shuffle=True, batch_size=batch_size)

In [13]:
import onnx
import onnxruntime
import time
import tf2onnx

In [14]:
def to_onnx(i, x, abs_errors,rel_errors, t0_list, t1_list, t2_list, t3_list, s_list):
    # Input to the model
    #device_reset = cuda.get_current_device()
    #device_reset.reset()
    #x.cuda()
   
    print("converting for batch: ", i)
    
    #torch.random.manual_seed(42)
    #x = torch.randn(10000, 3, 32, 32, requires_grad=True)
    since_1 = time.time()
    #model = torch.load(path+model_name+'.pth')
    with tf.device('/cpu:0'):  
        k_predict = model.predict(x)
    t_elapsed_1 = time.time() - since_1
    # Export the model
    since_1 = time.time()
    model_proto, external_tensor_storage = tf2onnx.convert.from_keras(model,
                input_signature=None, opset=11, custom_ops=None,
                custom_op_handlers=None, custom_rewriter=None,
                inputs_as_nchw=None, extra_opset=None, shape_override=None,
                 target=None, large_model=False, output_path='./data/ONNX/keras/keras-{}.onnx'.format(model_name))
    t_elapsed_2 = time.time() - since_1
    
    
    onnx_model = onnx.load("./data/ONNX/keras/keras-{}.onnx".format(model_name))
    onnx.checker.check_model(onnx_model)
    size2 = os.path.getsize("./data/ONNX/keras/keras-{}.onnx".format(model_name))
    s_list.append(size2)
    #def to_numpy(tensor):
    #    return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()
    ort_session = onnxruntime.InferenceSession("./data/ONNX/keras/keras-{}.onnx".format(model_name))
    since_1 = time.time()
    ort_inputs = {ort_session.get_inputs()[0].name: x}
    ort_outs = ort_session.run(None, ort_inputs)
    t_elapsed_3 = time.time() - since_1
    # compare ONNX Runtime and PyTorch results
    print("\n*********\n\n")
    #time_diff = t_elapsed_0+t_elapsed_1, t_elapsed_2, t_elapsed_3
    t0_list.append(t_elapsed_0)
    t1_list.append(t_elapsed_1)
    t2_list.append(t_elapsed_2)
    t3_list.append(t_elapsed_3)
    abs_err = np.absolute(k_predict-ort_outs[0])
    rel_err = np.absolute(k_predict-ort_outs[0])/ np.absolute(ort_outs[0])
    abs_errors.append(abs_err)
    rel_errors.append(rel_err)
    
    return (abs_err, rel_err)

In [15]:
def to_numpy(tensor):
    return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()
def _lets_convert(data):
    since = time.time()
    list_converted = []
    t0_list = []
    t1_list = []
    t2_list = []
    t3_list = []
    s_list = []
    abs_errors = []
    rel_errors = []
    for i, (inputs, labels) in enumerate(data):
        #torch.cuda.empty_cache()
        #images = images.cuda()
        inputs = to_numpy(inputs)
        labels = to_numpy(labels)
        list_converted.append(to_onnx(i, inputs, abs_errors,rel_errors, t0_list, t1_list, t2_list, t3_list, s_list))
        if i == 8:
            break
    time_elapsed = time.time() - since
    print('Conversion complete in {:.0f}m {:.0f}s,  Loading Pytorch: {}, Pytorch time: {:.4f}, conversion time: {:.4f}, onnx runtime: {:.4f}, onnx filesize: {}'.format(
        time_elapsed // 60, time_elapsed % 60, np.mean(np.array(t0_list)), np.mean(np.array(t1_list)), np.mean(np.array(t2_list)), np.mean(np.array(t3_list)), np.mean(np.array(s_list))) )
    
    return list_converted, abs_errors, rel_errors, 'Conversion complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60), np.mean(np.array(t0_list)), np.mean(np.array(t1_list)), np.mean(np.array(t2_list)), np.mean(np.array(t3_list)), np.mean(np.array(s_list))

In [16]:
model_name = 'LSTM-keras'
import pandas as pd 
for batch_size in [1, 5,10,20,30,40,50,60,70,80,90,100,128, 150,200, 250, 300]:
    print("################ Batch size: ", batch_size)
    # create Tensor datasets
    train_data = TensorDataset(torch.as_tensor(np.array(x_train).astype('int32')), torch.as_tensor(np.array(y_train).astype('int32')))
    valid_data = TensorDataset(torch.as_tensor(np.array(x_test).astype('int32')), torch.as_tensor(np.array(y_test).astype('int32')))

    # dataloaders
    #batch_size = batch_size_

    # make sure to SHUFFLE your data
    train_loader = DataLoader(train_data, shuffle=True, batch_size=batch_size)
    valid_loader = DataLoader(valid_data, shuffle=True, batch_size=batch_size)

    #list_converted = _lets_convert(test_ds)
    list_converted, abs_errors, rel_errors, total_time, t0, t1, t2, t3, file_size  = _lets_convert(valid_loader)
    
    
    for i in range(len(abs_errors)):
        if i == 0:
            abs_array = abs_errors[i]
            rel_array = rel_errors[i]
        else:
            np.append(abs_array, abs_errors[i])
            np.append(rel_array, rel_errors[i])

    abs_list = []
    rel_list = []
    model_list = []
    batch_list = []
    summary_list = ['Modelsize:{}, Conversion: {}, Loading: {}, t1: {}, conversion time: {}, onnx runtime: {}, onnx filesize: {}'.format(size0, total_time, t0, t1, t2, t3, file_size)]
    for i in range(len(abs_array)):
        abs_list.append(abs_array[i][0])
        rel = rel_array[i][0]
        if rel == np.inf or rel == -np.inf:
            rel = 0.0
        rel_list.append(rel)
        batch_list.append(batch_size)
        model_list.append(model_name)
        if i >= len(summary_list):
            summary_list.append('')
    print(len(summary_list), len(rel_list))
    data = pd.DataFrame({'model':model_list,'batch_size': batch_list, 'abs_errors':abs_list, 'rel_errors':rel_list, 'summary': summary_list})
    data.to_csv('./data/errors/keras2/tf_errors_{}_{}.csv'.format(model_name, batch_size))

################ Batch size:  1
converting for batch:  0
Instructions for updating:
Use `tf.compat.v1.graph_util.extract_sub_graph`

*********


converting for batch:  1

*********


converting for batch:  2

*********


converting for batch:  3

*********


converting for batch:  4

*********


converting for batch:  5

*********


converting for batch:  6

*********


converting for batch:  7

*********


converting for batch:  8

*********


Conversion complete in 2m 17s,  Loading Pytorch: 1.5919651985168457, Pytorch time: 0.2772, conversion time: 14.6218, onnx runtime: 0.0295, onnx filesize: 48256769.777777776
1 1
################ Batch size:  5
converting for batch:  0

*********


converting for batch:  1

*********


converting for batch:  2

*********


converting for batch:  3

*********


converting for batch:  4

*********


converting for batch:  5

*********


converting for batch:  6

*********


converting for batch:  7


  rel_err = np.absolute(k_predict-ort_outs[0])/ np.absolute(ort_outs[0])



*********


converting for batch:  8

*********


Conversion complete in 2m 31s,  Loading Pytorch: 1.5919651985168457, Pytorch time: 0.1097, conversion time: 16.3419, onnx runtime: 0.0773, onnx filesize: 48256975.44444445
5 5
################ Batch size:  10
converting for batch:  0

*********


converting for batch:  1

*********


converting for batch:  2

*********


converting for batch:  3

*********


converting for batch:  4

*********


converting for batch:  5

*********


converting for batch:  6

*********


converting for batch:  7

*********


converting for batch:  8

*********


Conversion complete in 2m 53s,  Loading Pytorch: 1.5919651985168457, Pytorch time: 0.1361, conversion time: 18.6987, onnx runtime: 0.1009, onnx filesize: 48257045.777777776
10 10
################ Batch size:  20
converting for batch:  0

*********


converting for batch:  1

*********


converting for batch:  2

*********


converting for batch:  3

*********


converting for batch:  4

********


*********


converting for batch:  6

*********


converting for batch:  7

*********


converting for batch:  8

*********


Conversion complete in 3m 44s,  Loading Pytorch: 1.5919651985168457, Pytorch time: 0.7957, conversion time: 22.1322, onnx runtime: 1.4757, onnx filesize: 48257270.777777776
300 300


## GRU Model

In [19]:
inputs = keras.Input(shape=(None,), dtype='int32')
x = layers.Embedding(total_words, 128)(inputs)
x = layers.Bidirectional(layers.GRU(64, return_sequences=True))(x)
x = layers.Bidirectional(layers.GRU(64))(x)
x = layers.Dense(24,activation="relu")(x)
outputs = layers.Dense(1,activation="sigmoid")(x)
gru_model = keras.Model(inputs, outputs)
gru_model.summary()

Model: "model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         [(None, None)]            0         
_________________________________________________________________
embedding_1 (Embedding)      (None, None, 128)         11838976  
_________________________________________________________________
bidirectional_2 (Bidirection (None, None, 128)         74496     
_________________________________________________________________
bidirectional_3 (Bidirection (None, 128)               74496     
_________________________________________________________________
dense_1 (Dense)              (None, 24)                3096      
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 25        
Total params: 11,991,089
Trainable params: 11,991,089
Non-trainable params: 0
_______________________________________________

In [20]:
gru_model.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy'])

In [21]:
model_name = 'gru-imdb'
date = datetime.today().strftime('%Y-%m-%d')

In [22]:
check_point = keras.callbacks.ModelCheckpoint(filepath="./data/26-10-2021/Train1/Keras/gru/tf_{}_{}_{}.h5".format(model_name, date, training_round),
                                              monitor="val_accuracy",
                                              mode="max",
                                              save_best_only=True,)

In [None]:
%%time
history2 = gru_model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1,
                        validation_data=(x_test, y_test),
                        callbacks=[check_point])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50

In [None]:
def export_history_csv(history_, model_name):
    since = time.time()
   
    data_file = open('./data/26-10-2021/Train1/Keras/gru/tf_{}_{}_{}.csv'.format(model_name, date, training_round), mode='w+', newline='', encoding='utf-8')
    data_writer = csv.writer(data_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
    data_writer.writerow(['Model','type', 'Dataset', 'Epoch', 'criterion', 'optimizer', 'scheduler','Train_loss', 'Train_acc', "val_loss", "Val_acc", 'time','Elapse_time','date'])
    for epoch_ in history_.epoch:
        data_writer.writerow([history_.model,'tensorflow', 'hymenoptera', epoch_, '', 
                          history_.model.optimizer, '',history_.history['loss'][epoch_], history_.history['accuracy'][epoch_], 
                          history_.history['val_loss'][epoch_], history_.history['val_accuracy'][epoch_], '','',date])
    data_file.close()

In [None]:
export_history_csv(history2, model_name)

In [None]:
gru_model.save("./data/26-10-2021/Train1/Keras/gru/tf_{}_{}_{}.h5".format(model_name, date, training_round))