In [1]:
import pandas as pd
import tensorflow as tf
from transformers import TFBertModel, BertTokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np
from sklearn.model_selection import train_test_split

2023-04-07 07:13:30.872306: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
df = pd.read_csv(
	'./train_snli.txt.zip',
	sep='\t',
	header=None,
	names=['sentence1', 'sentence2', 'label']
)[:1000]
df.head()

Unnamed: 0,sentence1,sentence2,label
0,A person on a horse jumps over a broken down a...,"A person is at a diner, ordering an omelette.",0
1,A person on a horse jumps over a broken down a...,"A person is outdoors, on a horse.",1
2,Children smiling and waving at camera,There are children present,1
3,Children smiling and waving at camera,The kids are frowning,0
4,A boy is jumping on skateboard in the middle o...,The boy skates down the sidewalk.,0


In [3]:
# Load the pre-trained BERT model and tokenizer
model = TFBertModel.from_pretrained('google/bert_uncased_L-2_H-128_A-2', from_pt=True)
tokenizer = BertTokenizer.from_pretrained('google/bert_uncased_L-2_H-128_A-2', from_pt=True)

2023-04-07 07:13:34.017099: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-04-07 07:13:34.019315: I tensorflow/core/common_runtime/process_util.cc:146] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.
Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias']
- This IS expected

In [4]:
max_length = 100
embedding_size = 128
# Vectorize sentences using BERT and preprocess vectors for CNN input
def vectorize_and_preprocess(sentence):
    input_ids = tokenizer.encode(sentence, add_special_tokens=True)
    input_ids = tf.convert_to_tensor([input_ids])

    embeddings = model(input_ids)[0][0].numpy()
    
    padded_embeddings = pad_sequences(
        [embeddings],
        maxlen=max_length,
        padding='post',
        truncating='post'
    )
    # Reshape padded embeddings into a 3D array suitable for CNN input
    cnn_input = padded_embeddings.reshape((max_length, embedding_size, 1))
    return cnn_input

In [5]:
df['vectors1'] = df['sentence1'].apply(vectorize_and_preprocess)

In [6]:
df['vectors2'] = df['sentence2'].apply(vectorize_and_preprocess)

In [7]:
vectors1 = np.reshape(np.array(df['vectors1'].tolist()), (len(df), max_length, embedding_size, 1))
vectors2 = np.reshape(np.array(df['vectors2'].tolist()), (len(df), max_length, embedding_size, 1))

In [8]:
# Define the input shape
input_shape = (max_length, embedding_size, 1)

# Define the input layers for vectors1 and vectors2
input1 = tf.keras.layers.Input(shape=input_shape, name='vectors1')
input2 = tf.keras.layers.Input(shape=input_shape, name='vectors2')

# Define the convolutional layers
conv1 = tf.keras.layers.Conv2D(filters=32, kernel_size=(3,3), activation='relu')
conv2 = tf.keras.layers.Conv2D(filters=64, kernel_size=(3,3), activation='relu')
pooling = tf.keras.layers.MaxPooling2D(pool_size=(2,2))

# Define the dense layers
flatten = tf.keras.layers.Flatten()
dense1 = tf.keras.layers.Dense(units=128, activation='relu')
dense2 = tf.keras.layers.Dense(units=64, activation='relu')
output_layer = tf.keras.layers.Dense(units=1, activation='sigmoid')

# Connect the layers
x1 = conv1(input1)
x1 = pooling(x1)
x1 = conv2(x1)
x1 = pooling(x1)
x1 = flatten(x1)
x1 = dense1(x1)
x1 = dense2(x1)

x2 = conv1(input2)
x2 = pooling(x2)
x2 = conv2(x2)
x2 = pooling(x2)
x2 = flatten(x2)
x2 = dense1(x2)
x2 = dense2(x2)

# Combine the two inputs and pass through the output layer
merged = tf.keras.layers.concatenate([x1, x2])
output = output_layer(merged)

# Define the model
model = tf.keras.models.Model(inputs=[input1, input2], outputs=output)

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [9]:
vectors1_train, vectors1_test, vectors2_train, vectors2_test, y_train, y_test = train_test_split(vectors1, vectors2, df['label'], test_size=0.2, random_state=42)

In [10]:
# Train the model
model.fit(
	[vectors1_train, vectors2_train], y_train,
	epochs=10, batch_size=32,
	validation_data=([vectors1_test, vectors2_test], y_test)
)

# Final training
# model.fit([vectors1, vectors2], df['label'], epochs=10, batch_size=32)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f5622b6a830>

In [12]:
model.save('bert-tiny-1k.h5')