In [None]:
import warnings
warnings.filterwarnings("ignore")

In [None]:
# Install dependencies 
# A dependency of the preprocessing for BERT inputs
!pip install -q -U tensorflow-text
# A dependency for using the AdamW optimizer
!pip install -q tf-models-official 

# Load necessary modules 
from sklearn.model_selection import train_test_split 
import shutil 
import os
import tensorflow as tf 
import numpy as np 
import pandas as pd 
import tensorflow_hub as hub 
import tensorflow_text as text 
from official.nlp import optimization 
import matplotlib.pyplot as plt 

tf.get_logger().setLevel('ERROR')

In [None]:
train_df = pd.read_csv('/kaggle/input/nlp-getting-started/train.csv')
# Select only the columns of interest
train_df = train_df[['text','target']]
# Split the training data so we get validation data as well  
X_train, X_test, y_train, y_test = train_test_split(train_df['text'],train_df['target'],test_size = 0.12, random_state = 42, shuffle = True)

## **Basic information about the model and preprocessing:**
* The weights of this model are those released by the original BERT authors. 
* This model has been pre-trained for English on the Wikipedia and BooksCorpus. 

* Text inputs has been lower-cased before tokenization into word pieces, and any accent markers have been stripped.
* For training, random input masking has been applied independently to word pieces (as in the original BERT paper).

In [None]:
# Build the model using Keras functional API 
def build_model():
  # Get the shape of the input text
  input_text = tf.keras.layers.Input(shape=(), dtype=tf.string, name='text')
  # Load preprocessing layer tensorflow hub 
  preprocessing_layer = hub.KerasLayer("https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3", name='preprocessing_layer')
  # Apply preprocessing 
  processed_text = preprocessing_layer(input_text)
  # Load encoder from tensorflow hub 
  encoder = hub.KerasLayer("https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/4", trainable=True, name='encoder')
  # Apply encoding 
  outputs = encoder(processed_text)
  # The BERT model returns a map with 3 keys: pooled_output, sequence_output, encoder_outputs
  # For the fine-tuning we are going to use the pooled_output array which creates an embedding 
  # entire dataset
  x = outputs['pooled_output']
  # Apply Dropout to avoid overfitting 
  x = tf.keras.layers.Dropout(0.1)(x)
  # Apply the classifier layer and use sigmoid activation function for fine-tuning 
  x = tf.keras.layers.Dense(1, activation='sigmoid', name='classifier')(x)
  return tf.keras.Model(input_text, x)

In [None]:
# Build the model 
model = build_model()
# Use Binary Cross Entropy as loss function 
loss = tf.keras.losses.BinaryCrossentropy()
# Use Binary Accuracy to assess fitness accuracy 
metrics = tf.metrics.BinaryAccuracy()
# Plot the model 
tf.keras.utils.plot_model(model)

In [None]:
epochs = 2

steps_per_epoch = tf.data.experimental.cardinality(tf.data.Dataset.range(len(train_df))).numpy()
num_train_steps = steps_per_epoch * epochs
num_warmup_steps = int(0.1*num_train_steps)

init_lr = 3e-5

#For fine-tuning, we use the same optimizer that BERT was originally trained. 
#This optimizer minimizes the prediction loss and does regularization by weight decay (aka AdamW).
optimizer = optimization.create_optimizer(init_lr=init_lr,
                                          num_train_steps=num_train_steps,
                                          num_warmup_steps=num_warmup_steps,
                                          optimizer_type='adamw')

model.compile(optimizer=optimizer, loss=loss, metrics=metrics)

In [None]:
history = model.fit(x = X_train,
                    y = y_train, 
                    validation_data = (X_test,y_test),
                    batch_size = 16, 
                    epochs=epochs)

In [None]:
# Save the model 
model.save('/kaggle/working/classifier_model', include_optimizer = True)
# Load the test data 
test_df = pd.read_csv('/kaggle/input/nlp-getting-started/test.csv')
# Predict the target values 
predictions = model.predict(test_df['text'])
# Squeeze them to a list 
predictions = tf.squeeze(predictions, axis = 1)
# Apply rounding so we get values between 0 and 1
predictions = np.rint(predictions)

In [None]:
def plot_loss_curves(history):
    '''
    Returns loss curves for training and validation metrics (if available)
    '''
    if "val_loss" in history.history:
        loss = history.history["loss"]
        val_loss = history.history["val_loss"]
        accuracy = history.history["binary_accuracy"]
        val_accuracy = history.history["val_binary_accuracy"]

        epochs = range(len(history.history["loss"])) #number of epochs 

        # Plot losses 
        plt.figsize=(10,7)
        plt.plot(epochs, loss, label = 'training_loss')
        plt.plot(epochs, val_loss, label = 'val_loss')
        plt.title('loss')
        plt.xlabel('epochs')
        plt.legend()

        # Plot accuracy 
        plt.figure()
        plt.plot(epochs, accuracy, label = 'training_accuracy')
        plt.plot(epochs, val_accuracy, label = 'val_accuracy')
        plt.title('accuracy')
        plt.xlabel('epochs')
        plt.legend()
    
    else:
        # Plot training loss and accuracy together 
        loss = history.history["loss"]
        accuracy = history.history["accuracy"]

        epochs = range(len(history.history["loss"])) #number of epochs 

        fig, ax1 = plt.subplots(figsize=(11, 9))
        ax1.plot(epochs, accuracy, label = 'training_accuracy')
        plt.xlabel('epochs')
        ax1.set_ylabel('Training Accuracy')
        
        ax2 = ax1.twinx()
        ax2.plot(epochs, loss, label = 'training_loss', color = 'tab:red')
        ax2.set_ylabel('Training Loss')
        
plot_loss_curves(history)

In [None]:
# Create submission data 
test_df['target'] = predictions 
test_df['target'] = test_df['target'].astype(int)
submission = test_df[['id','target']]
submission.to_csv('submission.csv', index = False)
submission