In [69]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [70]:
import re
import pickle
import numpy as np
import time
import random
import joblib
import matplotlib.pyplot as plt

from keras.preprocessing.text import Tokenizer
from gensim.models import Word2Vec
from sklearn.feature_extraction.text import TfidfVectorizer
from keras.models import Model
from tensorflow.keras.layers import Input
from keras.layers import Embedding, Dense, Dropout, LSTM, Bidirectional, TimeDistributed, InputLayer,SimpleRNN
from tensorflow.keras.models import Sequential
from keras.optimizers import Adam
from keras.utils import Sequence
from keras.initializers import glorot_normal
from keras.callbacks import ModelCheckpoint

In [71]:
import tensorflow as tf
print("GPU available:", tf.test.is_gpu_available())
print("GPU device name:", tf.test.gpu_device_name())

GPU available: True
GPU device name: /device:GPU:0


In [72]:
with open('/content/drive/My Drive/NLPdata/train.txt','r') as file:
    train_data = file.readlines()

val_data_raw = None
with open('/content/drive/My Drive/NLPdata/val.txt','r') as file:
    val_data = file.readlines()
print('Training data length:', len(train_data))
print('Validation data length:', len(val_data))

Training data length: 50000
Validation data length: 2500


In [73]:
diacritics = ['َ', 'ً', 'ُ', 'ٌ', 'ِ', 'ٍ', 'ْ', 'ّ', 'َّ', 'ًّ', 'ُّ', 'ٌّ', 'ِّ', 'ٍّ', '']

arabic_chars = ['ئ', 'ط', 'ه', 'ك', 'ض', 'ج', 'ذ', 'ع', 'ب', 'ل', 'د', 'ت', 'ا', 'ث', 'أ', 'س', 'ق', 'م', 'ش', 'ز', 'غ', 'ى', 'إ', 'خ', 'ن', 'آ', 'ؤ', 'ي', 'ظ', 'ص', 'ح', 'ة', 'و', 'ر', 'ء', 'ف']
arabic_chars_space = list(arabic_chars) + [' ']
arabic_chars_valid = list(arabic_chars) + [' '] + diacritics

# char_mapping = {' ': 0,
#     'ا': 1, 'ب': 2, 'ت': 3, 'ث': 4, 'ج': 5, 'ح': 6, 'خ': 7, 'د': 8, 'ذ': 9, 'ر': 10, 'ز': 11, 'س': 12, 'ش': 13, 'ص': 14,
#     'ض': 15, 'ط': 16, 'ظ': 17, 'ع': 18, 'غ': 19, 'ف': 20, 'ق': 21, 'ك': 22, 'ل': 23, 'م': 24, 'ن': 25, 'ه': 26, 'و': 27,
#     'ى': 28, 'ي': 29,'ء': 30, 'آ': 31, 'أ': 32, 'ؤ': 33, 'إ': 34, 'ئ': 35,'ة': 36,
#     '٠': 37, '١': 38, '٢': 39, '٣': 40, '٤': 41, '٥': 42, '٦': 43, '٧': 44, '٨': 45, '٩': 46,
#     '0': 47, '1': 48, '2': 49, '3': 50, '4': 51, '5': 52, '6': 53,'7': 54, '8': 55, '9': 56,
#     '<pad>': 57, '<s>': 58, '</s>': 59
# }

char_mapping = {' ': 0,
    'ا': 1, 'ب': 2, 'ت': 3, 'ث': 4, 'ج': 5, 'ح': 6, 'خ': 7, 'د': 8, 'ذ': 9, 'ر': 10, 'ز': 11, 'س': 12, 'ش': 13, 'ص': 14,
    'ض': 15, 'ط': 16, 'ظ': 17, 'ع': 18, 'غ': 19, 'ف': 20, 'ق': 21, 'ك': 22, 'ل': 23, 'م': 24, 'ن': 25, 'ه': 26, 'و': 27,
    'ى': 28, 'ي': 29,'ء': 30, 'آ': 31, 'أ': 32, 'ؤ': 33, 'إ': 34, 'ئ': 35,'ة': 36,
    '٠': 37, '١': 38, '٢': 39, '٣': 40, '٤': 41, '٥': 42, '٦': 43, '٧': 44, '٨': 45, '٩': 46,
    '0': 47, '1': 48, '2': 49, '3': 50, '4': 51, '5': 52, '6': 53,'7': 54, '8': 55, '9': 56,
    '<pad>': 57, '<s>': 58, '</s>': 59,
   '.':60,',':61,'،': 62,':':63,';':64,'؛':65,'(':66,')':67,'[': 68,']':69,'{': 70,'}': 71,'«': 72,'»': 73,'-': 74, '!': 75, '?': 76,'؟': 77,
    '\n': 78, '"': 79, '&': 80, "'": 81, '*': 82, '+': 83, '/': 84, '=': 85,  '_': 86, '`': 87, '~': 88,'\u200d': 89, '\u200f': 90, '–': 91,
    '’': 92, '“': 93, '…': 94, '﴾': 95, '﴿': 96
}

class_mapping = {'َ': 0, 'ً': 1, 'ُ': 2, 'ٌ': 3, 'ِ': 4, 'ٍ': 5, 'ْ': 6, 'ّ': 7, 'َّ': 8, 'ًّ':
9, 'ُّ': 10, 'ٌّ': 11, 'ِّ': 12, 'ٍّ': 13, '': 14}

reverse_class_mapping = {0:'َ', 1:'ً', 2:'ُ', 3:'ٌ', 4:'ِ', 5:'ٍ', 6:'ْ',7:'ّ',8: 'َّ',9: 'ًّ',10: 'ُّ',11: 'ٌّ',12: 'ِّ',13: 'ٍّ',14: ''}


punctionations_splitting ={'.':'.\n',',':',\n','،': '،\n',':':':\n',';':';\n','؛':'؛\n','(':'\n(',')':')\n',
                           '[': '\n[',']':']\n','{': '\n{','}': '}\n','«': '\n«','»': '»\n',
                           '-': '-\n', '!': '!\n', '?': '?\n', '؟': '؟\n',}

In [74]:
def remove_diacritics(data):
    return data.translate(str.maketrans('', '', ''.join(diacritics)))

test_str = 'قَوْلُهُ : ( أَوْ قَطَعَ الْأَوَّلُ يَدَهُ إلَخْ ) قَالَ الزَّرْكَشِيُّ'
print(remove_diacritics(test_str))


قوله : ( أو قطع الأول يده إلخ ) قال الزركشي


In [75]:
def one_hot_matrix(data, size):
    one_hot_matrix = [[1 if j == i else 0 for j in range(size)] for i in data]
    return one_hot_matrix

test = [0,1,2,3]
print(one_hot_matrix(test, 4))

[[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1]]


In [76]:
def one_hot_vector(index , size):
    one_hot_vector = [1 if j == index else 0 for j in range(size)]
    return one_hot_vector

one_hot_vector(1,5)

[0, 1, 0, 0, 0]

In [77]:
def split_using_punctuation(data):

  splitted_data = list()

  for sentence in data:
        for punc in punctionations_splitting:
          sentence = sentence.replace(punc, punctionations_splitting[punc])
        splitted_data += sentence.split('\n')

  return splitted_data

print(split_using_punctuation(train_data[0:2]))

['قَوْلُهُ :', ' ', '( أَوْ قَطَعَ الْأَوَّلُ يَدَهُ إلَخْ )', ' قَالَ الزَّرْكَشِيُّ', '( 14 / 123 )', '', '', 'ابْنُ عَرَفَةَ :', ' قَوْلُهُ :', ' بِلَفْظٍ يَقْتَضِيه كَإِنْكَارِ غَيْرِ حَدِيثٍ بِالْإِسْلَامِ وُجُوبَ مَا عُلِمَ وُجُوبُهُ مِنْ الدِّينِ ضَرُورَةً ', '( كَإِلْقَاءِ مُصْحَفٍ بِقَذَرٍ وَشَدِّ زُنَّارٍ )', ' ابْنُ عَرَفَةَ :', ' قَوْلُ ابْنِ شَاسٍ :', ' أَوْ بِفِعْلٍ يَتَضَمَّنُهُ هُوَ كَلُبْسِ الزُّنَّارِ وَإِلْقَاءِ الْمُصْحَفِ فِي صَرِيحِ النَّجَاسَةِ وَالسُّجُودِ لِلصَّنَمِ وَنَحْوِ ذَلِكَ ', '( وَسِحْرٍ )', ' مُحَمَّدٌ :', ' قَوْلُ مَالِكٍ وَأَصْحَابِهِ أَنَّ السَّاحِرَ كَافِرٌ بِاَللَّهِ تَعَالَى قَالَ مَالِكٌ :', ' هُوَ كَالزِّنْدِيقِ إذَا عَمِلَ السِّحْرَ بِنَفْسِهِ قُتِلَ وَلَمْ يُسْتَتَبْ .', '', '']


In [78]:
def split_on_length(data):

    max_len = 500

    splitted_data = list()

    for sentence in data:

       new_sentence = remove_diacritics(sentence).strip()

       if len(new_sentence) != 0:

          if len(new_sentence) > 0 and len(new_sentence) <= max_len:
                  splitted_data.append(sentence.strip())

          else:
            sentence_words = sentence.split()
            temp_sentence = ''

            for word in sentence_words:


              # if we add the word, it will exceed length, so don't add this word and take the sentence
              if len(remove_diacritics(temp_sentence).strip()) + len(remove_diacritics(word).strip()) + 1 > max_len:
                  if len(remove_diacritics(temp_sentence).strip()) > 0:
                      splitted_data.append(temp_sentence.strip())

                  # make a new sentence
                  temp_sentence = word

              else:
                  # it will not exceed, add the word to the sentence
                  temp_sentence = word if temp_sentence == '' else temp_sentence + ' ' + word

            if len(remove_diacritics(temp_sentence).strip()) > 0:
                  splitted_data.append(temp_sentence.strip())

    return splitted_data

In [79]:
split_punctuation_train_data = split_using_punctuation(train_data)
split_length_train_data      = split_on_length(split_punctuation_train_data)

split_punctuation_val_data = split_using_punctuation(val_data)
split_length_val_data      = split_on_length(split_punctuation_val_data)


print('Training data length:', len(split_length_train_data))
print('Validation data length:', len(split_length_val_data))

print(split_length_train_data[0:5])

Training data length: 305772
Validation data length: 15701
['قَوْلُهُ :', '( أَوْ قَطَعَ الْأَوَّلُ يَدَهُ إلَخْ )', 'قَالَ الزَّرْكَشِيُّ', '( 14 / 123 )', 'ابْنُ عَرَفَةَ :']


In [80]:
print('Training data max:', max(len(remove_diacritics(item).strip()) for item in split_length_train_data))
print('Validation data max:', max(len(remove_diacritics(item).strip()) for item in split_length_val_data))

print('Training data min:', min(len(remove_diacritics(item).strip()) for item in split_length_train_data))
print('Validation data min:', min(len(remove_diacritics(item).strip()) for item in split_length_val_data))

Training data max: 500
Validation data max: 500
Training data min: 1
Validation data min: 1


In [81]:
# list of short sentences -> with diarictic & without punc or numbers
clean_diac_train_data = [(''.join(char for char in text if char in arabic_chars_valid)).strip() for text in split_length_train_data]
clean_diac_val_data = [(''.join(char for char in text if char in arabic_chars_valid)).strip() for text in split_length_val_data]

clean_diac_train_data = [item for item in clean_diac_train_data if item != ""]
clean_diac_val_data = [item for item in clean_diac_val_data if item != ""]

print('Training data length:', len(clean_diac_train_data))
print('Validation data length:', len(clean_diac_val_data))

print(clean_diac_train_data[0:5])

Training data length: 280228
Validation data length: 14385
['قَوْلُهُ', 'أَوْ قَطَعَ الْأَوَّلُ يَدَهُ إلَخْ', 'قَالَ الزَّرْكَشِيُّ', 'ابْنُ عَرَفَةَ', 'قَوْلُهُ']


In [82]:
# list of short sentences -> without diarictic & without punc or numbers

clean_train_data = [remove_diacritics(text) for text in clean_diac_train_data]
clean_val_data = [remove_diacritics(text) for text in clean_diac_val_data]

print('Training data length:', len(clean_train_data))
print('Validation data length:', len(clean_val_data))

print(clean_train_data[0:5])

Training data length: 280228
Validation data length: 14385
['قوله', 'أو قطع الأول يده إلخ', 'قال الزركشي', 'ابن عرفة', 'قوله']


In [83]:
def get_sentence_classes(sentence):

  x = []
  y = []

  x.append(char_mapping['<s>'])
  y.append(one_hot_vector(class_mapping[''],len(class_mapping)))

  for index, char in enumerate(sentence):

    if char not in diacritics: # arabic char or space

      x.append(char_mapping[char])

      char_diacritic = ''
      sentence_len = len(sentence)

      if index + 1 < sentence_len:
        if sentence[index + 1] in diacritics:
          char_diacritic = sentence[index + 1]

          if index + 2 < sentence_len:
            char_diacritic = char_diacritic + sentence[index + 2] if sentence[index + 2] in diacritics and (char_diacritic + sentence[index + 2] in class_mapping) else sentence[index + 2] + char_diacritic if sentence[index + 2] in diacritics and (sentence[index + 2] + char_diacritic in class_mapping) else char_diacritic

      y.append(one_hot_vector(class_mapping[char_diacritic],len(class_mapping)))

  x.append(char_mapping['</s>'])
  y.append(one_hot_vector(class_mapping[''],len(class_mapping)))

  assert(len(x) == len(y))

  return x, y

In [84]:
def get_classes(data):

  X = []
  Y = []

  for sentence in data:
    x, y = get_sentence_classes(sentence)
    X.append(x)
    Y.append(y)

  X = np.asarray(X)
  Y = np.asarray(Y)

  return X, Y

In [85]:
print(clean_diac_train_data[0:5])

X,Y = get_classes(clean_diac_train_data[0:5])

print(X.shape)
print(Y.shape)
print(len(X[0]))
print(len(Y[0]))

['قَوْلُهُ', 'أَوْ قَطَعَ الْأَوَّلُ يَدَهُ إلَخْ', 'قَالَ الزَّرْكَشِيُّ', 'ابْنُ عَرَفَةَ', 'قَوْلُهُ']
(5,)
(5,)
6
6


  X = np.asarray(X)
  Y = np.asarray(Y)


In [86]:
class custom_data_generator(Sequence):

    def __init__(self, data, batch_size):
        self.data = data
        self.batch_size = batch_size
        # self.vec_len = 391

    def __len__(self):
        return int(np.ceil(len(self.data) / float(self.batch_size)))

    def __getitem__(self, index):

        start_index = index * self.batch_size
        end_index = (index + 1) * self.batch_size

        batch = self.data[start_index : end_index]
        X_batch, Y_batch = get_classes(batch)

        max_length_X = np.max([len(x) for x in X_batch])
        max_length_Y = np.max([len(y) for y in Y_batch])

        assert(max_length_X == max_length_Y)

        X = []
        for x in X_batch:
          padding_length = max_length_X - len(x)
          x = list(x)
          x.extend([char_mapping['<pad>']] * (padding_length))
          X.append(np.asarray(x))

        Y = []
        for y in Y_batch:
          padding_length = max_length_Y - len(y)
          y = list(y)
          y.extend([one_hot_vector(class_mapping[''],len(class_mapping))] * (padding_length))
          Y.append(np.asarray(y))

        X, Y = np.asarray(X), np.asarray(Y)

        # print('===================================> X:', X.shape)
        # print('===================================> Y:', Y.shape)

        return X, Y

In [87]:
def build_model():

   model = Sequential()
  #  model.add(InputLayer(input_shape=(None, len(char_mapping))))

   model.add( Embedding(input_dim=len(char_mapping),output_dim=25,embeddings_initializer=glorot_normal(seed=500)))

   model.add(SimpleRNN(units=256,return_sequences=True,kernel_initializer=glorot_normal(seed=500)))
   model.add(Dropout(0.5))
   model.add(SimpleRNN(units=256,return_sequences=True,kernel_initializer=glorot_normal(seed=500)))
   model.add(Dropout(0.5))
   model.add(SimpleRNN(units=256,return_sequences=True,kernel_initializer=glorot_normal(seed=500)))
   model.add(TimeDistributed(Dense(units=512,activation='relu',kernel_initializer=glorot_normal(seed=500))))
   model.add(TimeDistributed(Dense(units=512,activation='relu',kernel_initializer=glorot_normal(seed=500))))
   model.add(TimeDistributed(Dense(units=len(class_mapping),activation='softmax',kernel_initializer=glorot_normal(seed=500))))
   model.compile(loss='categorical_crossentropy', optimizer=Adam(), metrics=['accuracy'])
   return model

In [88]:
model = build_model()
model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_2 (Embedding)     (None, None, 25)          2425      
                                                                 
 simple_rnn_5 (SimpleRNN)    (None, None, 256)         72192     
                                                                 
 dropout_4 (Dropout)         (None, None, 256)         0         
                                                                 
 simple_rnn_6 (SimpleRNN)    (None, None, 256)         131328    
                                                                 
 dropout_5 (Dropout)         (None, None, 256)         0         
                                                                 
 simple_rnn_7 (SimpleRNN)    (None, None, 256)         131328    
                                                                 
 time_distributed_6 (TimeDi  (None, None, 512)        

In [89]:
def fit_model(model, epochs, batch_size, train_data, val_data):

    random.shuffle(train_data)
    random.shuffle(val_data)

    train_data = list(sorted(train_data, key=lambda item: len(remove_diacritics(item))))
    val_data   = list(sorted(val_data,   key=lambda item: len(remove_diacritics(item))))

    checkpoint_path = '/content/drive/My Drive/NLPdata/checkpoints/epoch{epoch:02d}.ckpt'
    checkpoint_cb = ModelCheckpoint(checkpoint_path, verbose=0)

    training_generator = custom_data_generator(train_data, batch_size)
    val_generator = custom_data_generator(val_data, batch_size)

    history =  model.fit(training_generator,validation_data=val_generator,epochs=epochs,callbacks=[checkpoint_cb])
    return history

In [90]:
start_time = time.time()

history =fit_model(model, 50, 256, clean_diac_train_data, clean_diac_val_data)
end_time = time.time()

training_accuracy = history.history['accuracy']
validation_accuracy = history.history['val_accuracy']

print('Final Training Accuracy:', training_accuracy[-1])
print('Final Validation Accuracy:', validation_accuracy[-1])

print('%s seconds' % round(end_time - start_time, 2))

Epoch 1/50


  X = np.asarray(X)
  Y = np.asarray(Y)


Epoch 2/50
   1/1095 [..............................] - ETA: 3:58 - loss: 0.5406 - accuracy: 0.8003

  X = np.asarray(X)
  Y = np.asarray(Y)


Epoch 3/50
   3/1095 [..............................] - ETA: 2:09 - loss: 0.4652 - accuracy: 0.8203

  X = np.asarray(X)
  Y = np.asarray(Y)


Epoch 4/50
   1/1095 [..............................] - ETA: 2:00 - loss: 0.4488 - accuracy: 0.8363

  X = np.asarray(X)
  Y = np.asarray(Y)


Epoch 5/50
  23/1095 [..............................] - ETA: 2:09 - loss: 0.3850 - accuracy: 0.8390

  X = np.asarray(X)
  Y = np.asarray(Y)


Epoch 6/50
   1/1095 [..............................] - ETA: 4:23 - loss: 0.4436 - accuracy: 0.8367

  X = np.asarray(X)
  Y = np.asarray(Y)


Epoch 7/50


  X = np.asarray(X)
  Y = np.asarray(Y)


Epoch 8/50
   1/1095 [..............................] - ETA: 2:25 - loss: 0.3891 - accuracy: 0.8570

  X = np.asarray(X)
  Y = np.asarray(Y)


Epoch 9/50
   6/1095 [..............................] - ETA: 2:43 - loss: 0.3615 - accuracy: 0.8537

  X = np.asarray(X)
  Y = np.asarray(Y)


Epoch 10/50
   5/1095 [..............................] - ETA: 4:12 - loss: 0.3890 - accuracy: 0.8465

  X = np.asarray(X)
  Y = np.asarray(Y)


Epoch 11/50
   1/1095 [..............................] - ETA: 4:20 - loss: 0.3778 - accuracy: 0.8611

  X = np.asarray(X)
  Y = np.asarray(Y)


Epoch 12/50
   1/1095 [..............................] - ETA: 4:22 - loss: 0.3909 - accuracy: 0.8526

  X = np.asarray(X)
  Y = np.asarray(Y)


Epoch 13/50
   1/1095 [..............................] - ETA: 6:55 - loss: 0.3916 - accuracy: 0.8510

  X = np.asarray(X)
  Y = np.asarray(Y)


Epoch 14/50
   1/1095 [..............................] - ETA: 1:40 - loss: 0.3814 - accuracy: 0.8540

  X = np.asarray(X)
  Y = np.asarray(Y)


Epoch 15/50
   6/1095 [..............................] - ETA: 3:27 - loss: 0.3578 - accuracy: 0.8510

  X = np.asarray(X)
  Y = np.asarray(Y)


Epoch 16/50
  12/1095 [..............................] - ETA: 3:03 - loss: 0.3440 - accuracy: 0.8613

  X = np.asarray(X)
  Y = np.asarray(Y)


Epoch 17/50
  11/1095 [..............................] - ETA: 2:23 - loss: 0.3533 - accuracy: 0.8595

  X = np.asarray(X)
  Y = np.asarray(Y)


Epoch 18/50
   3/1095 [..............................] - ETA: 1:28 - loss: 0.2945 - accuracy: 0.8663

  X = np.asarray(X)
  Y = np.asarray(Y)


Epoch 19/50
   1/1095 [..............................] - ETA: 11:37 - loss: 0.3540 - accuracy: 0.8652

  X = np.asarray(X)
  Y = np.asarray(Y)


Epoch 20/50
   8/1095 [..............................] - ETA: 2:16 - loss: 0.3008 - accuracy: 0.8626

  X = np.asarray(X)
  Y = np.asarray(Y)


Epoch 21/50
   7/1095 [..............................] - ETA: 1:18 - loss: 0.3304 - accuracy: 0.8648

  X = np.asarray(X)
  Y = np.asarray(Y)


Epoch 22/50


  X = np.asarray(X)
  Y = np.asarray(Y)


Epoch 23/50
   2/1095 [..............................] - ETA: 2:25 - loss: 0.3357 - accuracy: 0.8660

  X = np.asarray(X)
  Y = np.asarray(Y)


Epoch 24/50
   2/1095 [..............................] - ETA: 2:26 - loss: 0.3576 - accuracy: 0.8625

  X = np.asarray(X)
  Y = np.asarray(Y)


Epoch 25/50
   1/1095 [..............................] - ETA: 1:30 - loss: 0.3522 - accuracy: 0.8608

  X = np.asarray(X)
  Y = np.asarray(Y)


Epoch 26/50
   8/1095 [..............................] - ETA: 1:18 - loss: 0.3206 - accuracy: 0.8696

  X = np.asarray(X)
  Y = np.asarray(Y)


Epoch 27/50


  X = np.asarray(X)
  Y = np.asarray(Y)


Epoch 28/50


  X = np.asarray(X)
  Y = np.asarray(Y)


Epoch 29/50
   8/1095 [..............................] - ETA: 4:19 - loss: 0.3577 - accuracy: 0.8623

  X = np.asarray(X)
  Y = np.asarray(Y)


Epoch 30/50
   2/1095 [..............................] - ETA: 1:41 - loss: 0.2529 - accuracy: 0.8963

  X = np.asarray(X)
  Y = np.asarray(Y)


Epoch 31/50
   2/1095 [..............................] - ETA: 1:18 - loss: 0.3439 - accuracy: 0.8677

  X = np.asarray(X)
  Y = np.asarray(Y)


Epoch 32/50
   2/1095 [..............................] - ETA: 1:54 - loss: 0.3665 - accuracy: 0.8635

  X = np.asarray(X)
  Y = np.asarray(Y)


Epoch 33/50
   4/1095 [..............................] - ETA: 4:14 - loss: 0.3418 - accuracy: 0.8669

  X = np.asarray(X)
  Y = np.asarray(Y)


Epoch 34/50
   3/1095 [..............................] - ETA: 2:13 - loss: 0.2813 - accuracy: 0.8698

  X = np.asarray(X)
  Y = np.asarray(Y)


Epoch 35/50
   1/1095 [..............................] - ETA: 2:17 - loss: 0.3158 - accuracy: 0.8748

  X = np.asarray(X)
  Y = np.asarray(Y)


Epoch 36/50
   2/1095 [..............................] - ETA: 9:54 - loss: 0.3351 - accuracy: 0.8622

  X = np.asarray(X)
  Y = np.asarray(Y)


Epoch 37/50
   2/1095 [..............................] - ETA: 3:02 - loss: 0.3488 - accuracy: 0.8618

  X = np.asarray(X)
  Y = np.asarray(Y)


Epoch 38/50
   1/1095 [..............................] - ETA: 3:40 - loss: 0.3646 - accuracy: 0.8651

  X = np.asarray(X)
  Y = np.asarray(Y)


Epoch 39/50
  11/1095 [..............................] - ETA: 2:09 - loss: 0.3004 - accuracy: 0.8659

  X = np.asarray(X)
  Y = np.asarray(Y)


Epoch 40/50
   7/1095 [..............................] - ETA: 2:26 - loss: 0.3162 - accuracy: 0.8666

  X = np.asarray(X)
  Y = np.asarray(Y)


Epoch 41/50


  X = np.asarray(X)
  Y = np.asarray(Y)


Epoch 42/50
  12/1095 [..............................] - ETA: 2:03 - loss: 0.3240 - accuracy: 0.8630

  X = np.asarray(X)
  Y = np.asarray(Y)


Epoch 43/50
   4/1095 [..............................] - ETA: 3:29 - loss: 0.3631 - accuracy: 0.8596

  X = np.asarray(X)
  Y = np.asarray(Y)


Epoch 44/50


  X = np.asarray(X)
  Y = np.asarray(Y)


Epoch 45/50
   6/1095 [..............................] - ETA: 1:40 - loss: 0.2718 - accuracy: 0.8724

  X = np.asarray(X)
  Y = np.asarray(Y)


Epoch 46/50
   6/1095 [..............................] - ETA: 1:57 - loss: 0.3464 - accuracy: 0.8604

  X = np.asarray(X)
  Y = np.asarray(Y)


Epoch 47/50
  24/1095 [..............................] - ETA: 3:23 - loss: 0.3308 - accuracy: 0.8651

  X = np.asarray(X)
  Y = np.asarray(Y)


Epoch 48/50
  13/1095 [..............................] - ETA: 3:35 - loss: 0.3244 - accuracy: 0.8649

  X = np.asarray(X)
  Y = np.asarray(Y)


Epoch 49/50


  X = np.asarray(X)
  Y = np.asarray(Y)


Epoch 50/50
   3/1095 [..............................] - ETA: 1:35 - loss: 0.3213 - accuracy: 0.8722

  X = np.asarray(X)
  Y = np.asarray(Y)


Final Training Accuracy: 0.8611003756523132
Final Validation Accuracy: 0.8737233877182007
12041.82 seconds


In [91]:
joblib.dump(model, 'rnn_embeddings.joblib')
filename = 'rnn_embeddings.sav'
pickle.dump(model, open(filename, 'wb'))