In [2]:
# from tensorflow.keras import mixed_precision
# # On TPUs, use 'mixed_bfloat16' instead
# mixed_precision.set_global_policy('mixed_bfloat16')

In [3]:
from kaggle_datasets import KaggleDatasets
GCS_DS_PATH = KaggleDatasets().get_gcs_path()

In [4]:
from transformers import BertTokenizer, TFBertModel
import tensorflow as tf
tpu = tf.distribute.cluster_resolver.TPUClusterResolver.connect()
print("Device:", tpu.master())
strategy = tf.distribute.TPUStrategy(tpu)
print("Number of replicas:", strategy.num_replicas_in_sync)

2022-01-27 09:02:05.066784: I tensorflow/core/distributed_runtime/rpc/grpc_channel.cc:301] Initialize GrpcChannelCache for job worker -> {0 -> 10.0.0.2:8470}
2022-01-27 09:02:05.066856: I tensorflow/core/distributed_runtime/rpc/grpc_channel.cc:301] Initialize GrpcChannelCache for job localhost -> {0 -> localhost:30181}
2022-01-27 09:02:05.069355: I tensorflow/core/distributed_runtime/rpc/grpc_channel.cc:301] Initialize GrpcChannelCache for job worker -> {0 -> 10.0.0.2:8470}
2022-01-27 09:02:05.069408: I tensorflow/core/distributed_runtime/rpc/grpc_channel.cc:301] Initialize GrpcChannelCache for job localhost -> {0 -> localhost:30181}


Device: grpc://10.0.0.2:8470
Number of replicas: 8


In [5]:
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
user_credential = user_secrets.get_gcloud_credential()
user_secrets.set_tensorflow_credential(user_credential)

In [6]:
def create_model(maxlen=512):
    text_model = TFBertModel.from_pretrained("bert-base-uncased")
    image_model = tf.keras.applications.efficientnet.EfficientNetB5(
          include_top=False,
          input_shape=(299, 299, 3)
    )
    for layer in image_model.layers[:-25]:
        layer.trainable = False
    x = tf.keras.layers.GlobalAveragePooling2D()(image_model.output)
    x = tf.keras.layers.Dense(2048,activation='relu')(x) # Best model has this
  
    input_ids = tf.keras.Input(shape=(maxlen,),dtype='int32', name='input_text')
    sequence_output = text_model(input_ids)[0]
    cls_token = sequence_output[:, 0, :]
    cls_token = tf.keras.layers.Dense(2048,activation='relu')(cls_token) # Best model has this
  
    concat_layer = tf.keras.layers.Concatenate()([x, cls_token])
  
    output = tf.keras.layers.Dense(512,activation='relu')(concat_layer)
    output = tf.keras.layers.Dropout(0.2)(output) # Best model has this
    binary_output = tf.keras.layers.Dense(1,activation='sigmoid', name='misogynous')(output)
    multi_label_output = tf.keras.layers.Dense(512,activation='relu')(output) # Best model has this
    a = tf.keras.layers.Dense(1,activation='sigmoid', name='a')(multi_label_output) # Best model has this
    b = tf.keras.layers.Dense(128,activation='relu')(multi_label_output) # Best model has this
    b = tf.keras.layers.Dense(1,activation='sigmoid', name='b')(b) # Best model has this
    c = tf.keras.layers.Dense(1,activation='sigmoid', name='c')(multi_label_output) # Best model has this
    d = tf.keras.layers.Dense(1,activation='sigmoid', name='d')(multi_label_output) # Best model has this
    model = tf.keras.models.Model(inputs = [image_model.input, input_ids], outputs=[binary_output, a, b, c, d])
    model.compile(tf.keras.optimizers.Adam(learning_rate=6e-5), loss={'misogynous': 'binary_crossentropy', 'a': 'binary_crossentropy', 'b': 'binary_crossentropy', 'c': 'binary_crossentropy', 'd': 'binary_crossentropy'}, metrics=['accuracy'])
    return model

In [7]:
with strategy.scope():
    model = create_model()

Some layers from the model checkpoint at bert-base-uncased were not used when initializing TFBertModel: ['mlm___cls', 'nsp___cls']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertModel were initialized from the model checkpoint at bert-base-uncased.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.


In [8]:
import collections

def parse_records(example_proto):
  feature_description = {
    'image': tf.io.FixedLenFeature([], tf.string),
    'text':  tf.io.VarLenFeature(tf.int64),
    'label': tf.io.FixedLenFeature([], tf.int64),
    'multi_label': tf.io.VarLenFeature(tf.int64),
  }
  example = tf.io.parse_single_example(example_proto, feature_description)
  example['image'] = tf.io.decode_jpeg(example['image'], channels=3)
  example['multi_label'] = example['multi_label'].values
  example['text'] = example['text'].values
  return example

def prepare_sample(features):
    image = tf.image.resize(features["image"], size=(299, 299))
    text = features["text"]
    label = features['label']
    multi_label = features['multi_label']
    return collections.OrderedDict(input_1=image, input_text=text), collections.OrderedDict(misogynous=label, a=multi_label[0], b=multi_label[1], c=multi_label[2], d=multi_label[3],)
#     return collections.OrderedDict(input_1=image, input_text=text), collections.OrderedDict(multi_label=multi_label)

def get_dataset(filenames, batch_size, mode='train'):
    
    ignore_order = tf.data.Options()
    ignore_order.experimental_deterministic = False
    dataset = (
        tf.data.TFRecordDataset(filenames, num_parallel_reads=AUTOTUNE)
        .with_options(ignore_order)
        .map(parse_records, num_parallel_calls=AUTOTUNE)
        .map(prepare_sample, num_parallel_calls=AUTOTUNE)
        .shuffle(batch_size * 10)
        .batch(batch_size)
        .prefetch(AUTOTUNE)
    )
    if mode=='train':
        dataset = dataset.repeat()
    return dataset

In [9]:
from glob import glob
train_data = tf.io.gfile.glob(f'{GCS_DS_PATH}/train/content/MAMI_BERT/train/*.tfrecords') + tf.io.gfile.glob(f'{GCS_DS_PATH}/eval/*.tfrecords')
eval_data = [train_data[0]]
train_data = train_data[1:]
batch_size = 256
AUTOTUNE = tf.data.AUTOTUNE

In [10]:
train_dataset = get_dataset(train_data, batch_size)
eval_dataset = get_dataset(eval_data, batch_size, mode='eval')

In [12]:
early_stopping_cb = tf.keras.callbacks.EarlyStopping(
#     patience=10, restore_best_weights=True, monitor='val_multi_label_loss'
        patience=10, restore_best_weights=True, monitor='val_misogynous_accuracy'
)
class PolynomialDecay():
    def __init__(self, maxEpochs=100, initAlpha=0.01, power=1.0):
        self.maxEpochs = maxEpochs
        self.initAlpha = initAlpha
        self.power = power
    def __call__(self, epoch):
        decay = (1 - (epoch / float(self.maxEpochs))) ** self.power
        alpha = self.initAlpha * decay
        return float(alpha)
lr_scheduler = tf.keras.callbacks.LearningRateScheduler(PolynomialDecay(initAlpha=6e-5))

In [13]:
model.fit(train_dataset, steps_per_epoch=10000//batch_size, epochs=50, validation_data=eval_dataset, callbacks=[early_stopping_cb, lr_scheduler])

Epoch 1/50


2022-01-27 09:06:48.579035: W ./tensorflow/core/distributed_runtime/eager/destroy_tensor_handle_node.h:57] Ignoring an error encountered when deleting remote tensors handles: Invalid argument: Unable to find the relevant tensor remote_handle: Op ID: 70464, Output num: 8
Additional GRPC error information from remote target /job:worker/replica:0/task:0:
:{"created":"@1643274408.575545860","description":"Error received from peer ipv4:10.0.0.2:8470","file":"external/com_github_grpc_grpc/src/core/lib/surface/call.cc","file_line":1056,"grpc_message":"Unable to find the relevant tensor remote_handle: Op ID: 70464, Output num: 8","grpc_status":3}


Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<tensorflow.python.keras.callbacks.History at 0x7fc8ce8e9ad0>

In [14]:
model_dir = "./MAMI_Bert_5_output"
!rm -r {model_dir}
!mkdir {model_dir}

localhost_save_option = tf.saved_model.SaveOptions(experimental_io_device="/job:localhost")
model.save(model_dir, options=localhost_save_option)

rm: cannot remove './MAMI_Bert_5_output': No such file or directory


2022-01-27 09:47:22.107708: W tensorflow/python/util/util.cc:348] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.


In [15]:
with open(f'./{model_dir}/model.txt', 'w') as file:
    file.write('''def create_model(maxlen=512):
    text_model = TFBertModel.from_pretrained("bert-base-uncased")
    image_model = tf.keras.applications.efficientnet.EfficientNetB5(
          include_top=False,
          input_shape=(299, 299, 3)
    )
    for layer in image_model.layers[:-25]:
        layer.trainable = False
    x = tf.keras.layers.GlobalAveragePooling2D()(image_model.output)
    x = tf.keras.layers.Dense(2048,activation='relu')(x) # Best model has this
  
    input_ids = tf.keras.Input(shape=(maxlen,),dtype='int32', name='input_text')
    sequence_output = text_model(input_ids)[0]
    cls_token = sequence_output[:, 0, :]
    cls_token = tf.keras.layers.Dense(2048,activation='relu')(cls_token) # Best model has this
  
    concat_layer = tf.keras.layers.Concatenate()([x, cls_token])
  
    output = tf.keras.layers.Dense(512,activation='relu')(concat_layer)
    output = tf.keras.layers.Dropout(0.2)(output) # Best model has this
    binary_output = tf.keras.layers.Dense(1,activation='sigmoid', name='misogynous')(output)
    multi_label_output = tf.keras.layers.Dense(512,activation='relu')(output) # Best model has this
    a = tf.keras.layers.Dense(1,activation='sigmoid', name='a')(multi_label_output) # Best model has this
    b = tf.keras.layers.Dense(128,activation='relu')(multi_label_output) # Best model has this
    b = tf.keras.layers.Dense(1,activation='sigmoid', name='b')(b) # Best model has this
    c = tf.keras.layers.Dense(1,activation='sigmoid', name='c')(multi_label_output) # Best model has this
    d = tf.keras.layers.Dense(1,activation='sigmoid', name='d')(multi_label_output) # Best model has this
    model = tf.keras.models.Model(inputs = [image_model.input, input_ids], outputs=[binary_output, a, b, c, d])
    model.compile(tf.keras.optimizers.Adam(learning_rate=6e-5), loss={'misogynous': 'binary_crossentropy', 'a': 'binary_crossentropy', 'b': 'binary_crossentropy', 'c': 'binary_crossentropy', 'd': 'binary_crossentropy'}, metrics=['accuracy'])
    return model''')

In [16]:
!zip -r "./MAMI_Bert_5_output_eff_8460.zip" "./MAMI_Bert_5_output"

  adding: MAMI_Bert_5_output/ (stored 0%)
  adding: MAMI_Bert_5_output/saved_model.pb (deflated 92%)
  adding: MAMI_Bert_5_output/variables/ (stored 0%)
  adding: MAMI_Bert_5_output/variables/variables.index (deflated 80%)
  adding: MAMI_Bert_5_output/variables/variables.data-00000-of-00001 (deflated 14%)
  adding: MAMI_Bert_5_output/assets/ (stored 0%)
  adding: MAMI_Bert_5_output/model.txt (deflated 68%)


In [17]:
!rm -r "./MAMI_Bert_5_output"