# Transfer Learning Using ALBERT

The script below takes the pre-trained ALBERT model from the `transformers` library and fine-tunes it with the Twitter dataset.

In [None]:
# Perform necessary setup
from google.colab import drive
from os.path import join

ROOT = '/content/drive'
MY_GOOGLE_DRIVE_PATH = 'My Drive/springboard/capstone/sentiment' 
PROJECT_PATH = join(ROOT, MY_GOOGLE_DRIVE_PATH)

drive.mount(ROOT)

Mounted at /content/drive


In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from transformers import AlbertTokenizer, TFAlbertForSequenceClassification
from transformers import InputExample, InputFeatures

# Load ALBERT model
model = TFAlbertForSequenceClassification.from_pretrained('albert-base-v1', num_labels=2)
tokenizer = AlbertTokenizer.from_pretrained('albert-base-v1')
model.summary()

2021-10-26 20:58:16.349947: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
All model checkpoint layers were used when initializing TFAlbertForSequenceClassification.

Some layers of TFAlbertForSequenceClassification were not initialized from the model checkpoint at albert-base-v1 and are newly initialized: ['classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model: "tf_albert_for_sequence_classification"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
albert (TFAlbertMainLayer)   multiple                  11683584  
_________________________________________________________________
dropout_4 (Dropout)          multiple                  0         
_________________________________________________________________
classifier (Dense)           multiple                  1538      
Total params: 11,685,122
Trainable params: 11,685,122
Non-trainable params: 0
_________________________________________________________________


In [2]:
# Prepare dataset
dataset = pd.read_csv('../data/clean-data.csv')
dataset['label'] = np.where(dataset['label']==4,1,0)

# Split dataset into train, test, and validation
train, test = train_test_split(dataset, test_size=0.2, random_state=123)
validation, test = train_test_split(test, test_size=0.5, random_state=123)
train.head()

Unnamed: 0,label,tweet
759795,0,new study mate mozart he is been encouraging m...
443931,0,at the bridal shop its kind of depressin where...
18163,0,ohno wang
1118234,1,"i will be in cali june one thousand, seven hun..."
771397,0,watching real world my time is running out


In [3]:
# Create function for turning individual data points into examples
def convert_to_examples(train, test, validation): 
  train_input = train.apply(lambda x: InputExample(guid=None, text_a = x['tweet'], text_b = None, label = x['label']), axis = 1)
  test_input = test.apply(lambda x: InputExample(guid=None, text_a = x['tweet'], text_b = None, label = x['label']), axis = 1)
  validation_input = validation.apply(lambda x: InputExample(guid=None, text_a = x['tweet'], text_b = None, label = x['label']), axis = 1)

  return train_input, test_input, validation_input

# Create function for data ingestion pipeline
def convert_to_tf_dataset(examples, tokenizer, max_length=140):
  features = []
  counter = 0

  for e in examples:
    try:
      input_dict = tokenizer.encode_plus(
          e.text_a,
          add_special_tokens=True,
          max_length=max_length,
          return_token_type_ids=True,
          return_attention_mask=True,
          padding='max_length',
          truncation=True
      )
      input_ids, token_type_ids, attention_mask = (input_dict['input_ids'], input_dict['token_type_ids'], input_dict['attention_mask'])
      features.append(InputFeatures(input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids, label=e.label))
    except:
      counter += 1
      pass
  
  print('data tokenization complete with %s data points unprocessed' % (counter))

  def gen():
    for f in features:
        yield ({'input_ids': f.input_ids, 'attention_mask': f.attention_mask, 'token_type_ids': f.token_type_ids}, f.label)

  return tf.data.Dataset.from_generator(gen,
                                        ({'input_ids': tf.int32, 'attention_mask': tf.int32, 'token_type_ids': tf.int32}, tf.int64),
                                        ({'input_ids': tf.TensorShape([None]), 'attention_mask': tf.TensorShape([None]), 'token_type_ids': tf.TensorShape([None])} ,tf.TensorShape([]))
                                        )

In [4]:
# Run above functions
train_input, test_input, validation_input = convert_to_examples(train, test, validation)

train_data = convert_to_tf_dataset(list(train_input), tokenizer)
train_data = train_data.shuffle(100).batch(32).repeat(2)

test_data = convert_to_tf_dataset(list(test_input), tokenizer)
test_data = test_data.batch(32)

validation_data = convert_to_tf_dataset(list(validation_input), tokenizer)
validation_data = validation_data.batch(32)

data tokenization complete with 2487 data points unprocessed
data tokenization complete with 315 data points unprocessed
data tokenization complete with 335 data points unprocessed


In [None]:
# Specify checkpoint path
checkpoint_path = 'model/checkpoint/cp.ckpt'
cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
                                                 save_weights_only=True,
                                                 verbose=1)

# Train model
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=5e-5), 
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), 
              metrics=[tf.keras.metrics.SparseCategoricalAccuracy('accuracy')])
model.fit(train_data, epochs=1, validation_data=validation_data, callbacks=[cp_callback])

2021-10-26 20:23:45.266476: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)


In [None]:
model.save('model')

In [5]:
model = tf.keras.models.load_model('../model/trained-model', compile=True)
model.evaluate(test_data)

2021-10-26 21:06:18.142014: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)


ValueError: in user code:

    /Users/spencermoon/Documents/sentiment/env/lib/python3.7/site-packages/keras/engine/training.py:1330 test_function  *
        return step_function(self, iterator)
    /Users/spencermoon/Documents/sentiment/env/lib/python3.7/site-packages/keras/engine/training.py:1320 step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    /Users/spencermoon/Documents/sentiment/env/lib/python3.7/site-packages/tensorflow/python/distribute/distribute_lib.py:1286 run
        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
    /Users/spencermoon/Documents/sentiment/env/lib/python3.7/site-packages/tensorflow/python/distribute/distribute_lib.py:2849 call_for_each_replica
        return self._call_for_each_replica(fn, args, kwargs)
    /Users/spencermoon/Documents/sentiment/env/lib/python3.7/site-packages/tensorflow/python/distribute/distribute_lib.py:3632 _call_for_each_replica
        return fn(*args, **kwargs)
    /Users/spencermoon/Documents/sentiment/env/lib/python3.7/site-packages/keras/engine/training.py:1313 run_step  **
        outputs = model.test_step(data)
    /Users/spencermoon/Documents/sentiment/env/lib/python3.7/site-packages/keras/engine/training.py:1267 test_step
        y_pred = self(x, training=False)
    /Users/spencermoon/Documents/sentiment/env/lib/python3.7/site-packages/keras/engine/base_layer.py:1037 __call__
        outputs = call_fn(inputs, *args, **kwargs)
    /Users/spencermoon/Documents/sentiment/env/lib/python3.7/site-packages/keras/saving/saved_model/utils.py:68 return_outputs_and_add_losses
        outputs, losses = fn(*args, **kwargs)
    /Users/spencermoon/Documents/sentiment/env/lib/python3.7/site-packages/keras/saving/saved_model/utils.py:166 wrap_with_training_arg
        lambda: replace_training_and_call(False))
    /Users/spencermoon/Documents/sentiment/env/lib/python3.7/site-packages/keras/utils/control_flow_util.py:106 smart_cond
        pred, true_fn=true_fn, false_fn=false_fn, name=name)
    /Users/spencermoon/Documents/sentiment/env/lib/python3.7/site-packages/tensorflow/python/framework/smart_cond.py:58 smart_cond
        return false_fn()
    /Users/spencermoon/Documents/sentiment/env/lib/python3.7/site-packages/keras/saving/saved_model/utils.py:166 <lambda>
        lambda: replace_training_and_call(False))
    /Users/spencermoon/Documents/sentiment/env/lib/python3.7/site-packages/keras/saving/saved_model/utils.py:162 replace_training_and_call
        return wrapped_call(*args, **kwargs)
    /Users/spencermoon/Documents/sentiment/env/lib/python3.7/site-packages/tensorflow/python/eager/def_function.py:885 __call__
        result = self._call(*args, **kwds)
    /Users/spencermoon/Documents/sentiment/env/lib/python3.7/site-packages/tensorflow/python/eager/def_function.py:933 _call
        self._initialize(args, kwds, add_initializers_to=initializers)
    /Users/spencermoon/Documents/sentiment/env/lib/python3.7/site-packages/tensorflow/python/eager/def_function.py:760 _initialize
        *args, **kwds))
    /Users/spencermoon/Documents/sentiment/env/lib/python3.7/site-packages/tensorflow/python/eager/function.py:3066 _get_concrete_function_internal_garbage_collected
        graph_function, _ = self._maybe_define_function(args, kwargs)
    /Users/spencermoon/Documents/sentiment/env/lib/python3.7/site-packages/tensorflow/python/eager/function.py:3463 _maybe_define_function
        graph_function = self._create_graph_function(args, kwargs)
    /Users/spencermoon/Documents/sentiment/env/lib/python3.7/site-packages/tensorflow/python/eager/function.py:3308 _create_graph_function
        capture_by_value=self._capture_by_value),
    /Users/spencermoon/Documents/sentiment/env/lib/python3.7/site-packages/tensorflow/python/framework/func_graph.py:1007 func_graph_from_py_func
        func_outputs = python_func(*func_args, **func_kwargs)
    /Users/spencermoon/Documents/sentiment/env/lib/python3.7/site-packages/tensorflow/python/eager/def_function.py:668 wrapped_fn
        out = weak_wrapped_fn().__wrapped__(*args, **kwds)
    /Users/spencermoon/Documents/sentiment/env/lib/python3.7/site-packages/tensorflow/python/saved_model/function_deserialization.py:294 restored_function_body
        "\n\n".join(signature_descriptions)))

    ValueError: Could not find matching function to call loaded from the SavedModel. Got:
      Positional arguments (11 total):
        * {'input_ids': <tf.Tensor 'input_ids_1:0' shape=(None, None) dtype=int32>, 'attention_mask': <tf.Tensor 'input_ids:0' shape=(None, None) dtype=int32>, 'token_type_ids': <tf.Tensor 'input_ids_2:0' shape=(None, None) dtype=int32>}
        * None
        * None
        * None
        * None
        * None
        * None
        * None
        * None
        * None
        * False
      Keyword arguments: {}
    
    Expected these arguments to match one of the following 2 option(s):
    
    Option 1:
      Positional arguments (11 total):
        * {'input_ids': TensorSpec(shape=(None, 5), dtype=tf.int32, name='input_ids/input_ids')}
        * None
        * None
        * None
        * None
        * None
        * None
        * None
        * None
        * None
        * False
      Keyword arguments: {}
    
    Option 2:
      Positional arguments (11 total):
        * {'input_ids': TensorSpec(shape=(None, 5), dtype=tf.int32, name='input_ids/input_ids')}
        * None
        * None
        * None
        * None
        * None
        * None
        * None
        * None
        * None
        * True
      Keyword arguments: {}
