In [4]:
import pandas as pd
from prep_train_pipeline import prep_train_pipeline
from helper_roberta import tokenize, create_model
from transformers import RobertaTokenizerFast, TFRobertaModel
import tensorflow as tf

In [None]:
# check gpu
tf.config.list_physical_devices('GPU')

In [3]:
import numpy as np 
print(np.__version__)

1.26.4


In [None]:
# read csv
df_512_token = pd.read_csv("text_512_tokens.csv")
df_512_token = df_512_token.drop(columns=["Unnamed: 0"])
df_512_token.head()

In [None]:
# preprocess for training pipeline
X_train, X_test, y_train, y_test = prep_train_pipeline(df=df_512_token)

### looks good, we should keep in mind that it could lead to overfitting (because copies), so we'll build in regulizations later

In [None]:
# roberta tokenizer
tokenizer = RobertaTokenizerFast.from_pretrained("roberta-base")

# turn features in input_ids, attention_masks, less training time
train_input_ids, train_attention_masks = tokenize(data=X_train, tokenizer=tokenizer)
test_input_ids, test_attention_masks = tokenize(data=X_test, tokenizer=tokenizer)

In [5]:
# create the model and show the summary (we dont freeze the backbone, so directly finetune it)

roberta_model = TFRobertaModel.from_pretrained("roberta-base")

model = create_model(roberta_model=roberta_model)
model.summary()

2024-03-26 10:51:07.019273: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-03-26 10:51:07.039326: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-03-26 10:51:07.039449: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_1 (InputLayer)        [(None, 512)]                0         []                            
                                                                                                  
 input_2 (InputLayer)        [(None, 512)]                0         []                            
                                                                                                  
 tf_roberta_model (TFRobert  TFBaseModelOutputWithPooli   1246456   ['input_1[0][0]',             
 aModel)                     ngAndCrossAttentions(last_   32         'input_2[0][0]']             
                             hidden_state=(None, 512, 7                                           
                             68),                                                             

In [None]:
del df_512_token, X_train, X_test, tokenizer, roberta_model

In [None]:
# define some callbacks

checkpoint_path = "./512_roberta_model/"
logs = "./logs/"

# save best model (monitor = val_loss)
model_checkpoint = tf.keras.callbacks.ModelCheckpoint(
    checkpoint_path, verbose=0, save_best_only=True
)

# stop training after 5 epochs no improvement, restore best weights
early_stopping = tf.keras.callbacks.EarlyStopping(patience=5, verbose=0, restore_best_weights=True)

# after 2 epochs no improvement, reduce lr (factor = 0.1, so 1e-4 -> 1e-5)
reduce_lr_on_plateau = tf.keras.callbacks.ReduceLROnPlateau(patience=2, min_lr=1e-8)

# tensorboard callback for better visual evaluation
tensorboard = tf.keras.callbacks.TensorBoard(log_dir=logs)

In [None]:
callbacks = [model_checkpoint, early_stopping, reduce_lr_on_plateau, tensorboard]

# model ready for training
history = model.fit(
    [train_input_ids, train_attention_masks],
    y_train,
    validation_data=([test_input_ids, test_attention_masks], y_test),
    epochs = 10,
    batch_size = 16,
    callbacks=callbacks
)