In [None]:
# Install Model maker
!pip install -q tflite-model-maker

In [None]:
# Imports and check that we are using TF2.x
import numpy as np
import os

from tflite_model_maker import configs
from tflite_model_maker import ExportFormat
from tflite_model_maker import model_spec
from tflite_model_maker import text_classifier
from tflite_model_maker import TextClassifierDataLoader

import tensorflow as tf
assert tf.__version__.startswith('2')
tf.get_logger().setLevel('ERROR')

In [None]:
training_data = tf.keras.utils.get_file(fname='cleaned_youtube.csv', origin='https://storage.googleapis.com/laurencemoroney-blog.appspot.com/cleaned_youtube.csv', extract=False)

In [None]:
training_data = os.path.join(os.path.dirname(training_data), 'cleaned_youtube.csv')

In [None]:
# Use a model spec from model maker. Options are 'mobilebert_classifier', 'bert_classifier' and 'average_word_vec'
# The first 2 use the BERT model, which is accurate, but larger and slower to train
# Average Word Vec is kinda like transfer learning where there are pre-trained word weights
# and dictionaries
spec = model_spec.get('average_word_vec')
spec.num_words = 2000
spec.seq_len = 20
spec.wordvec_dim = 7
#spec = model_spec.get('mobilebert_classifier')

In [None]:
# Load the CSV using DataLoader.from_csv to make the training_data
train_data = TextClassifierDataLoader.from_csv(
      filename=os.path.join(os.path.join(training_data)),
      text_column='commenttext', #For Toxicity use " value_of_text" (note the leading space)
      label_column='spam', #For Toxicity also use "label"
      model_spec=spec,
      delimiter=',',
      is_training=True)

In [None]:
# Build the model
model = text_classifier.create(train_data, model_spec=spec, epochs=100)

In [None]:
model.export(export_dir='/mm_spam')
# If you want the labels and the vocab, for example for iOS, you can use this
model.export(export_dir='/mm_spam/', export_format=[ExportFormat.LABEL, ExportFormat.VOCAB])

In [None]:
# Optionally you can shrink and quantize the model prior to exporting
config = configs.QuantizationConfig.create_dynamic_range_quantization(optimizations=[tf.lite.Optimize.OPTIMIZE_FOR_LATENCY])
config.experimental_new_quantizer = True
model.export(export_dir='/mm_spam/', quantization_config=config)

In [None]:
model.summary()

In [None]:
# Export to TFJS
model.export(export_dir="/mm_js/", export_format=[ExportFormat.TFJS, ExportFormat.LABEL, ExportFormat.VOCAB])