# How to create classifier model using custom modules

In [1]:
import os
import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_text as text
from ModellingUtils import ModellingUtils

# Data Preparation
We will use the **Imdb (aclImdb) dataset** for this implementation. You can obtain the training, testing, and validation data from the dataset using the **text_dataset_from_directory** method of the **ModellingUtils** module.

In [3]:
DATA_PATH = r'C:\Users\Lloyd Acha\Documents\ACHA_Files\Projects\Programming\DataSets\aclImdb_v1\aclImdb'

In [4]:
train_ds, class_names, val_ds = ModellingUtils.text_dataset_from_directory(
    os.path.join(DATA_PATH, 'train')
    )
test_ds = ModellingUtils.text_dataset_from_directory(
    os.path.join(DATA_PATH, 'test'), 
    subset = None, 
    validation_split = 0,
    seed = 0,
    validation = False
    )[0]

Found 25000 files belonging to 2 classes.
Using 20000 files for training.
Found 25000 files belonging to 2 classes.
Using 5000 files for validation.
Found 25000 files belonging to 2 classes.


We will also use pretrained models from **TensorFlow Hub**.

In [5]:
preprocess_url = 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3'
model_url = 'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-4_H-512_A-8/1'

**ModellingUtils** module also **build_classifier_model** method that will automatically build a specific model from the pretrained models.

In [6]:
model = ModellingUtils.build_classifier_model(preprocess_url, model_url)
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 text (InputLayer)              [(None,)]            0           []                               
                                                                                                  
 preprocessing (KerasLayer)     {'input_mask': (Non  0           ['text[0][0]']                   
                                e, 128),                                                          
                                 'input_word_ids':                                                
                                (None, 128),                                                      
                                 'input_type_ids':                                                
                                (None, 128)}                                                  

# Train 

In [8]:
with tf.device('/gpu:0'):
    model = ModellingUtils.train_model(model, 
                                       data = train_ds, 
                                       epochs = 5, 
                                       validation_data = val_ds)