## Imports

In [28]:
import pandas as pd
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import BinaryCrossentropy
from sklearn.metrics import accuracy_score
%run utils.ipynb import *
%run models.ipynb import *

## Load Processed Data

In [29]:
train_data = pd.read_csv('C:/Users/mahmo/Desktop/Name-Verification-Model/input/processed_data/train_data.csv')
test_data = pd.read_csv('C:/Users/mahmo/Desktop/Name-Verification-Model/input/processed_data/test_data.csv')

In [30]:
X_train = train_data.drop(columns=['Label'], axis=1)
y_train = train_data[['Label']]
X_test = test_data.drop(columns=['Label'], axis=1)
y_test = test_data[['Label']]

## Core Model

We will use the **Tokenizer** of **AraBERT** to convert our full names into tokens after that ids that represent each word to make data ready for my own Deep Learning Model

In [31]:
# load the tokenizer
model_name = 'aubmindlab/bert-base-arabertv02'
transformer = DataTransformer(model_name=model_name)

This method apply all transofrmation and preprocessing for the names data, as it converts it to **Tokens** and convert tokens into **IDs** and **Encodes** the labels of the data by converting them to **One Hot Encoding** return to us the data ready to be fed into my own Deep Learning Model

In [32]:
# transofrm X_train and y_train data
X_train , y_train = transformer.transform_data(X_train,y_train)
# transform X_test and y_test data
X_test , y_test = transformer.transform_data(X_test,y_test)

Define my own **Model Architecture**

In [33]:
# hyperparameters
VOCAB_SIZE = 10000
MAX_LEN = 50
EMBEDDING_DIM = 16
HIDDEN_SIZE = 32
NUM_CLASSES = 2
BATCH_SIZE = 512
EPOCHS = 35
LSTM_SIZE = 16
VAL_SLPIT = 0.15
OPTIMIZER = Adam()
LOSS = BinaryCrossentropy()
METRICS = ['accuracy']

In [34]:
custom_model = CustomModel(OPTIMIZER= OPTIMIZER, LOSS= LOSS, METRICS= METRICS)
model = custom_model.create_model(VOCAB_SIZE= VOCAB_SIZE, EMBEDDING_DIM= EMBEDDING_DIM, MAX_LEN= MAX_LEN, 
                                LSTM_SIZE= LSTM_SIZE, HIDDEN_SIZE= HIDDEN_SIZE, NUM_CLASSES= NUM_CLASSES)
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 50, 16)            160000    
                                                                 
 bidirectional (Bidirectiona  (None, 50, 32)           4224      
 l)                                                              
                                                                 
 global_max_pooling1d (Globa  (None, 32)               0         
 lMaxPooling1D)                                                  
                                                                 
 dense (Dense)               (None, 32)                1056      
                                                                 
 dropout (Dropout)           (None, 32)                0         
                                                                 
 dense_1 (Dense)             (None, 2)                 6

In [35]:
# Training...
history = model.fit(x= X_train,y= y_train, batch_size= BATCH_SIZE, epochs = EPOCHS, validation_split= VAL_SLPIT)

Epoch 1/35

KeyboardInterrupt: 

In [11]:
evaluation_tools = ModelEvaluation()
evaluation_tools.plot_curves(history)

NameError: name 'plot_curves' is not defined

In [12]:
y_pred = evaluation_tools.predictions(model, X_test)
# convert predictions to dataframe
y_pred = pd.DataFrame(y_pred, columns=['Correct','Incorrect'])
accuracy = accuracy_score(y_test, y_pred)
print('The accuracy of the model on Test Data is:', accuracy)

NameError: name 'predictions' is not defined

In [13]:
saved_model_name = 'C:/Users/mahmo/Desktop/Name-Verification-Model/models/name_verification_model.h5'
model.save_weights(saved_model_name,save_format='h5')

NameError: name 'model' is not defined