In [7]:
import tensorflow as tf
import tensorflow_hub as hub
import pandas as pd
from sklearn.model_selection import train_test_split

# Load the BERT model
module_url = "https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/3"
bert_layer = hub.KerasLayer(module_url, trainable=True)

# Load the data into a Pandas DataFrame
data = pd.read_csv("./train_snli.txt.zip", sep="\t", header=None, names=["text1", "text2", "label"])
data['text1'] = data['text1'].astype(str)
data['text2'] = data['text2'].astype(str)
data.head()

2023-03-21 07:11:06.109792: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 93763584 exceeds 10% of free system memory.


Unnamed: 0,text1,text2,label
0,A person on a horse jumps over a broken down a...,"A person is at a diner, ordering an omelette.",0
1,A person on a horse jumps over a broken down a...,"A person is outdoors, on a horse.",1
2,Children smiling and waving at camera,There are children present,1
3,Children smiling and waving at camera,The kids are frowning,0
4,A boy is jumping on skateboard in the middle o...,The boy skates down the sidewalk.,0


In [8]:

# Split the data into training and validation sets
train, val = train_test_split(data, test_size=0.2)

# Define the tokenizer
tokenizer = tf.keras.preprocessing.text.Tokenizer()
tokenizer.fit_on_texts(train["text1"].values + train["text2"].values)

# Define the input layers
max_seq_length = 128
input_word_ids = tf.keras.layers.Input(shape=(max_seq_length,), dtype=tf.int32, name="input_word_ids")
input_mask = tf.keras.layers.Input(shape=(max_seq_length,), dtype=tf.int32, name="input_mask")
segment_ids = tf.keras.layers.Input(shape=(max_seq_length,), dtype=tf.int32, name="segment_ids")

# Connect the BERT layer
pooled_output, sequence_output = bert_layer([input_word_ids, input_mask, segment_ids])

# Add additional layers on top of BERT
x = tf.keras.layers.Dense(64, activation="relu")(pooled_output)
output = tf.keras.layers.Dense(1, activation="sigmoid")(x)

# Define the model input and output
model = tf.keras.models.Model(
    inputs=[input_word_ids, input_mask, segment_ids], outputs=output
)

# Compile the model
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-5),
    loss="binary_crossentropy",
    metrics=["accuracy"],
)

# Convert the text data to input features for BERT
def convert_example_to_feature(text1, text2, max_seq_length):
    tokens_a = tokenizer.tokenize(text1)
    tokens_b = tokenizer.tokenize(text2)

    # Combine the two texts and add [CLS] and [SEP] tokens
    tokens = ["[CLS]"] + tokens_a + ["[SEP]"] + tokens_b + ["[SEP]"]

    # Truncate or pad the sequence to the maximum sequence length
    if len(tokens) > max_seq_length:
        tokens = tokens[:max_seq_length]
    else:
        tokens += ["[PAD]"] * (max_seq_length - len(tokens))

    # Convert tokens to input IDs, input masks, and segment IDs
    input_ids = tokenizer.convert_tokens_to_ids(tokens)
    input_masks = [1] * len(input_ids)
    segment_ids = [0] * (len(tokens_a) + 2) + [1] * (len(tokens_b) + 1)

    return input_ids, input_masks, segment_ids

# Define a function to create the input features for BERT
def create_input_features(df, max_seq_length):
    input_ids, input_masks, segment_ids = [], [], []
    for _, row in df.iterrows():
        text1, text2 = row["text1"], row["text2"]
        input_id, input_mask, segment_id = convert_example_to_feature(text1, text2, max_seq_length)
        input_ids.append(input_id)
        input_masks.append(input_mask)
        segment_ids.append(segment_id)
    return np.array(input_ids), np.array(input_masks), np.array(segment_ids)

# Train the model
batch_size = 32
epochs = 10
train_input = create_input_features
(train, max_seq_length)
val_input = create_input_features(val, max_seq_length)

history = model.fit(
train_input,
train["label"].values,
validation_data=(val_input, val["label"].values),
batch_size=batch_size,
epochs=epochs,
verbose=1,
)

ValueError: Exception encountered when calling layer "keras_layer_3" (type KerasLayer).

in user code:

    File "/home/sujay1844/.local/share/virtualenvs/plagiarism-checker-C_ALrIvo/lib64/python3.11/site-packages/tensorflow_hub/keras_layer.py", line 242, in call  *
        result = smart_cond.smart_cond(training,

    ValueError: Could not find matching concrete function to call loaded from the SavedModel. Got:
      Positional arguments (3 total):
        * [<tf.Tensor 'inputs:0' shape=(None, 128) dtype=int32>,
     <tf.Tensor 'inputs_1:0' shape=(None, 128) dtype=int32>,
     <tf.Tensor 'inputs_2:0' shape=(None, 128) dtype=int32>]
        * False
        * None
      Keyword arguments: {}
    
     Expected these arguments to match one of the following 4 option(s):
    
    Option 1:
      Positional arguments (3 total):
        * {'input_mask': TensorSpec(shape=(None, None), dtype=tf.int32, name='inputs/input_mask'),
     'input_type_ids': TensorSpec(shape=(None, None), dtype=tf.int32, name='inputs/input_type_ids'),
     'input_word_ids': TensorSpec(shape=(None, None), dtype=tf.int32, name='inputs/input_word_ids')}
        * False
        * None
      Keyword arguments: {}
    
    Option 2:
      Positional arguments (3 total):
        * {'input_mask': TensorSpec(shape=(None, None), dtype=tf.int32, name='input_mask'),
     'input_type_ids': TensorSpec(shape=(None, None), dtype=tf.int32, name='input_type_ids'),
     'input_word_ids': TensorSpec(shape=(None, None), dtype=tf.int32, name='input_word_ids')}
        * True
        * None
      Keyword arguments: {}
    
    Option 3:
      Positional arguments (3 total):
        * {'input_mask': TensorSpec(shape=(None, None), dtype=tf.int32, name='input_mask'),
     'input_type_ids': TensorSpec(shape=(None, None), dtype=tf.int32, name='input_type_ids'),
     'input_word_ids': TensorSpec(shape=(None, None), dtype=tf.int32, name='input_word_ids')}
        * False
        * None
      Keyword arguments: {}
    
    Option 4:
      Positional arguments (3 total):
        * {'input_mask': TensorSpec(shape=(None, None), dtype=tf.int32, name='inputs/input_mask'),
     'input_type_ids': TensorSpec(shape=(None, None), dtype=tf.int32, name='inputs/input_type_ids'),
     'input_word_ids': TensorSpec(shape=(None, None), dtype=tf.int32, name='inputs/input_word_ids')}
        * True
        * None
      Keyword arguments: {}


Call arguments received by layer "keras_layer_3" (type KerasLayer):
  • inputs=['tf.Tensor(shape=(None, 128), dtype=int32)', 'tf.Tensor(shape=(None, 128), dtype=int32)', 'tf.Tensor(shape=(None, 128), dtype=int32)']
  • training=None