# Setup

We will use the AdamW optimizer from [tensorflow/models](https://github.com/tensorflow/models). `tensorflow-text` is a dependency of the preprocessing for BERT inputs.

In [1]:
import tensorflow as tf
import tensorflow_addons as tfa
import tensorflow_hub as hub

import tensorflow_text as text  # For preprocessor
from official.nlp import optimization  # to create AdamW optimizer

# import matplotlib.pyplot as plt

import pandas as pd
import numpy as np


# Make notebook reproducible

In [2]:
seed = 42
tf.random.set_seed(seed)
np.random.seed(seed)


In [3]:
# physical_devices= tf.config.list_physical_devices('GPU')
# for device in physical_devices:
#     tf.config.experimental.set_memory_growth(device, True)


# Load Dataset

## Loading the dataset in pandas and doing test, train split

In [4]:
BASE_DIR = "../datasets/nepali_tweets_dataset_labelled_tweets_feb_23"


In [5]:
FOLD = 1


In [6]:
import os.path
from glob import glob

val_filepath = os.path.join(BASE_DIR, f"fold_{FOLD}.csv")

train_files = glob(os.path.join(BASE_DIR, "*.csv"))
train_files.remove(val_filepath)


In [7]:
val_df = pd.read_csv(val_filepath)


In [8]:
val_df.head()


Unnamed: 0,text,covid_stats,vaccination,covid_politics,humour,lockdown,civic_views,life_during_pandemic,covid_waves_and_variants
0,केही दिन यता वीर हस्पिटलमा कोरोना जॅाच गराउन आ...,1,0,0,0,0,1,0,0
1,भूटानले पठायो नेपालमा तीन लाख अष्ट्राजेनिकाको ...,0,1,1,0,0,0,0,0
2,चीनका चार कम्पनीले कोभिड–१९ को खोप बिक्री गर्न...,0,1,1,0,0,0,0,0
3,हिमालपारिको दुःखः ट्रयाक्टरमा खोप ढुवानी,0,1,0,0,0,0,1,0
4,"यो ट्वीट खोप लगाउनेहरुले आरटी,लगाउन तयार हुनेल...",0,1,0,1,0,0,0,0


In [9]:
train_df = pd.concat(map(pd.read_csv, train_files))


In [10]:
train_df.head()


Unnamed: 0,text,covid_stats,vaccination,covid_politics,humour,lockdown,civic_views,life_during_pandemic,covid_waves_and_variants
0,"देशभर थपिए २,६२२ कोरोना संक्रमित, २३ जनाको मृत्यु",1,0,0,0,0,0,0,0
1,संसदको शिक्षा तथा स्वास्थ समितिले कक्षा १२ को ...,0,1,1,0,0,0,0,0
2,ले आफ्ना हवाईयात्रुमाझ हालै गरेको सर्वेक्षणले ...,1,1,0,0,0,1,0,0
3,केही आदत के बानि पुनर्जन्म लिएर आउदा पनि बदलिद...,0,1,0,1,0,0,0,0
4,सरकारले मन्त्रीपरिषदलाई पूर्णता दिन नसक्नु र ढ...,0,1,1,0,0,1,0,0


In [11]:
train_df.info()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 9793 entries, 0 to 2447
Data columns (total 9 columns):
 #   Column                    Non-Null Count  Dtype 
---  ------                    --------------  ----- 
 0   text                      9793 non-null   object
 1   covid_stats               9793 non-null   int64 
 2   vaccination               9793 non-null   int64 
 3   covid_politics            9793 non-null   int64 
 4   humour                    9793 non-null   int64 
 5   lockdown                  9793 non-null   int64 
 6   civic_views               9793 non-null   int64 
 7   life_during_pandemic      9793 non-null   int64 
 8   covid_waves_and_variants  9793 non-null   int64 
dtypes: int64(8), object(1)
memory usage: 765.1+ KB


In [12]:
text_train = train_df["text"]
text_val = val_df["text"]


In [13]:
label_train = train_df.iloc[:, 1:]
label_val = val_df.iloc[:, 1:]


In [14]:
expected_prob = label_train.mean()
expected_prob


covid_stats                 0.164709
vaccination                 0.335035
covid_politics              0.210150
humour                      0.134586
lockdown                    0.131727
civic_views                 0.226182
life_during_pandemic        0.144797
covid_waves_and_variants    0.141019
dtype: float64

In [15]:
def others_prob(label_ds: pd.DataFrame):
    horz_sum = label_ds.sum(axis=1)
    return (horz_sum == 0).sum() / len(horz_sum)


others_prob(label_train)


0.08199734504237721

In [16]:
NUM_CLASSES = len(label_train.columns)
NUM_CLASSES


8

In [17]:
label_train.sum()


covid_stats                 1613
vaccination                 3281
covid_politics              2058
humour                      1318
lockdown                    1290
civic_views                 2215
life_during_pandemic        1418
covid_waves_and_variants    1381
dtype: int64

In [18]:
label_val.sum()


covid_stats                 413
vaccination                 803
covid_politics              500
humour                      315
lockdown                    314
civic_views                 581
life_during_pandemic        374
covid_waves_and_variants    342
dtype: int64

### Create a bias initializer

In [19]:
pos = label_train.sum()


In [20]:
neg = label_train.count() - pos
neg


covid_stats                 8180
vaccination                 6512
covid_politics              7735
humour                      8475
lockdown                    8503
civic_views                 7578
life_during_pandemic        8375
covid_waves_and_variants    8412
dtype: int64

In [21]:
bias_initializer = (np.log(pos) - np.log(neg)).to_numpy()
bias_initializer


array([-1.62359635, -0.68549837, -1.32402085, -1.86100522, -1.88577682,
       -1.22999691, -1.77600365, -1.80685138])

In [22]:
len(train_df)


9793

## Create `tf.data.Dataset` from pandas dataframe

In [23]:
strategy = tf.distribute.MirroredStrategy()


2022-08-11 11:16:37.902891: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-08-11 11:16:38.745417: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1532] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 11397 MB memory:  -> device: 0, name: NVIDIA TITAN Xp, pci bus id: 0000:b4:00.0, compute capability: 6.1
2022-08-11 11:16:38.745977: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1532] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 10410 MB memory:  -> device: 1, name: NVIDIA GeForce GTX 1080 Ti, pci bus id: 0000:b3:00.0, compute capability: 6.1


INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0', '/job:localhost/replica:0/task:0/device:GPU:1')


In [24]:
print("Number of GPUs:", strategy.num_replicas_in_sync)


Number of GPUs: 2


In [25]:
BUFFER_SIZE = len(train_df)

BATCH_SIZE_PER_REPLICA = 32  # Decrease if OOM

GLOBAL_BATCH_SIZE = BATCH_SIZE_PER_REPLICA * strategy.num_replicas_in_sync


In [26]:
train_ds = (
    tf.data.Dataset.from_tensor_slices((text_train, label_train))
    .cache()
    .shuffle(BUFFER_SIZE)
    .batch(GLOBAL_BATCH_SIZE)
    .prefetch(-1)
)


In [27]:
val_ds = (
    tf.data.Dataset.from_tensor_slices((text_val, label_val))
    .batch(GLOBAL_BATCH_SIZE)
    .cache()
)


# Loading models from TensorFlow Hub

In [28]:
model = "muril"


In [29]:
if model == "muril":
    tfhub_handle_encoder = "https://tfhub.dev/google/MuRIL/1"
    tfhub_handle_preprocess = "https://tfhub.dev/google/MuRIL_preprocess/1"
else:
    tfhub_handle_encoder = (
        "https://tfhub.dev/tensorflow/bert_multi_cased_L-12_H-768_A-12/4"
    )
    tfhub_handle_preprocess = (
        "https://tfhub.dev/tensorflow/bert_multi_cased_preprocess/3"
    )


The used model is [MURIL: Multilingual Representations for Indian Languages](https://arxiv.org/abs/2103.10730). A BERT model pre-trained on 17 Indian languages, and their transliterated counterparts.

# Define your model

You will create a very simple fine-tuned model, with the preprocessing model, the selected BERT model, one Dense and a Dropout layer.

In [30]:
DROPOUT_RATE = 0.4


In [31]:
def build_classifier_model():
    text_input = tf.keras.layers.Input(shape=(), dtype=tf.string, name="text")
    preprocessing_layer = hub.KerasLayer(tfhub_handle_preprocess, name="preprocessing")
    encoder_inputs = preprocessing_layer(text_input)
    encoder = hub.KerasLayer(tfhub_handle_encoder, trainable=True, name="BERT_encoder")
    outputs = encoder(encoder_inputs)
    net = outputs["pooled_output"]
    net = tf.keras.layers.Dropout(DROPOUT_RATE)(net)
    net = tfa.layers.InstanceNormalization()(net)
    net = tf.keras.layers.Dense(
        NUM_CLASSES,
        bias_initializer=tf.keras.initializers.Constant(bias_initializer),
        name="classifier",
    )(net)
    return tf.keras.Model(text_input, net)


In [32]:
# classifier_model = build_classifier_model()


In [33]:
# pred = classifier_model(tf.constant(text_test))
# print(tf.sigmoid(pred))


In [34]:
# tf.keras.utils.plot_model(classifier_model)


# Model training

You now have all the pieces to train a model, including the preprocessing module, BERT encoder, data, and classifier.

## Loss function

### Needs a change

Since this is a multilabel classification problem and the model outputs a probability (a single-unit layer), you'll use [`losses.BinaryCrossentropy`](https://www.tensorflow.org/api_docs/python/tf/keras/losses/BinaryCrossentropy) loss function.

In [35]:
def get_loss_metrics():
    loss = tf.keras.losses.BinaryCrossentropy(from_logits=True)
    metrics = [
        tf.metrics.BinaryAccuracy(),
        tfa.metrics.F1Score(num_classes=NUM_CLASSES, average="weighted", threshold=0.5),
        tf.keras.metrics.AUC(
            len(label_val),
            curve="PR",
            multi_label=True,
            num_labels=NUM_CLASSES,
            from_logits=True,
        ),
    ]

    return loss, metrics


## Optimizer

For fine-tuning, let's use the same optimizer that BERT was originally trained with: the "Adaptive Moments" (Adam). This optimizer minimizes the prediction loss and does regularization by weight decay (not using moments), which is also known as [AdamW](https://arxiv.org/abs/1711.05101).

For the learning rate (`init_lr`), you will use the same schedule as BERT pre-training: linear decay of a notional initial learning rate, prefixed with a linear warm-up phase over the first 10% of training steps (`num_warmup_steps`). In line with the BERT paper, the initial learning rate is smaller for fine-tuning (best of 5e-5, 3e-5, 2e-5).

In [36]:
EPOCHS = 9
INIT_LR = 5e-5  # best of 5e-5, 3e-5, 2e-5


def get_optimizer():
    steps_per_epoch = int(tf.data.experimental.cardinality(train_ds))
    num_train_steps = steps_per_epoch * EPOCHS
    num_warmup_steps = int(0.1 * num_train_steps)

    optimizer = optimization.create_optimizer(
        init_lr=INIT_LR,
        num_train_steps=num_train_steps,
        num_warmup_steps=num_warmup_steps,
        optimizer_type="adamw",
    )
    return optimizer


## Callbacks

In [37]:
project_name = "final_submission"
MODEL_BASE_DIR = f"../{project_name}/{model}_instancenorm_d{DROPOUT_RATE}_fold{FOLD}/"
LOG_DIR = MODEL_BASE_DIR + "runs"
CHECKPOINT_DIR = MODEL_BASE_DIR + "checkpoints/{epoch:02d}"


In [38]:
import wandb
from wandb.keras import WandbCallback

wandb.init(project=project_name, entity="quarks")
wandb.config = {
    "init_lr": INIT_LR,
    "epochs": EPOCHS,
    "batch_size": GLOBAL_BATCH_SIZE,
    "dropout_rate": DROPOUT_RATE,
}


Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mrabinadk1[0m ([33mquarks[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [39]:
monitor = "val_auc"
callbacks = [
    tf.keras.callbacks.EarlyStopping(monitor=monitor, patience=5, verbose=1),
    tf.keras.callbacks.ModelCheckpoint(
        filepath=CHECKPOINT_DIR,
        save_weights_only=True,
        verbose=1,
    ),
    WandbCallback(monitor=monitor, save_weights_only=True),
]




## Loading the BERT model and training

Using the classifier_model you created earlier, you can compile the model with the loss, metric and optimizer.

In [40]:
with strategy.scope():
    classifier_model = build_classifier_model()
    loss, metrics = get_loss_metrics()
    optimizer = get_optimizer()
    classifier_model.compile(optimizer=optimizer, loss=loss, metrics=metrics)

2022-08-11 11:17:04.319351: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 606059520 exceeds 10% of free system memory.


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


In [41]:
tf.keras.utils.plot_model(classifier_model)


You must install pydot (`pip install pydot`) and install graphviz (see instructions at https://graphviz.gitlab.io/download/) for plot_model/model_to_dot to work.


In [42]:
classifier_model.summary()


Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 text (InputLayer)              [(None,)]            0           []                               
                                                                                                  
 preprocessing (KerasLayer)     {'input_word_ids':   0           ['text[0][0]']                   
                                (None, 128),                                                      
                                 'input_type_ids':                                                
                                (None, 128),                                                      
                                 'input_mask': (Non                                               
                                e, 128)}                                                      

In [43]:
!nvidia-smi

Thu Aug 11 11:17:14 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 515.65.01    Driver Version: 515.65.01    CUDA Version: 11.7     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA GeForce ...  Off  | 00000000:B3:00.0 Off |                  N/A |
| 49%   35C    P2    65W / 250W |  10681MiB / 11264MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
|   1  NVIDIA TITAN Xp     Off  | 00000000:B4:00.0 Off |                  N/A |
| 23%   42C    P2    67W / 250W |  11697MiB / 12288MiB |      0%      Default |
|       

In [44]:
print(f"Training model with {tfhub_handle_encoder}")
history = classifier_model.fit(
    x=train_ds, validation_data=val_ds, epochs=EPOCHS, callbacks=callbacks
)


Training model with https://tfhub.dev/google/MuRIL/1


2022-08-11 11:17:15.636387: W tensorflow/core/grappler/optimizers/data/auto_shard.cc:776] AUTO sharding policy will apply DATA sharding policy as it failed to apply FILE sharding policy because of the following reason: Found an unshardable source dataset: name: "TensorSliceDataset/_2"
op: "TensorSliceDataset"
input: "Placeholder/_0"
input: "Placeholder/_1"
attr {
  key: "Toutput_types"
  value {
    list {
      type: DT_STRING
      type: DT_INT64
    }
  }
}
attr {
  key: "_cardinality"
  value {
    i: 9793
  }
}
attr {
  key: "is_files"
  value {
    b: false
  }
}
attr {
  key: "metadata"
  value {
    s: "\n\024TensorSliceDataset:0"
  }
}
attr {
  key: "output_shapes"
  value {
    list {
      shape {
      }
      shape {
        dim {
          size: 8
        }
      }
    }
  }
}
experimental_type {
  type_id: TFT_PRODUCT
  args {
    type_id: TFT_DATASET
    args {
      type_id: TFT_PRODUCT
      args {
        type_id: TFT_TENSOR
        args {
          type_id: TFT_STRI

Epoch 1/9
INFO:tensorflow:batch_all_reduce: 202 all-reduces with algorithm = nccl, num_packs = 1


INFO:tensorflow:batch_all_reduce: 202 all-reduces with algorithm = nccl, num_packs = 1






INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:GPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:GPU:0', '/job:localhost/replica:0/task:0/device:GPU:1').


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:GPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:GPU:0', '/job:localhost/replica:0/task:0/device:GPU:1').


INFO:tensorflow:batch_all_reduce: 202 all-reduces with algorithm = nccl, num_packs = 1


INFO:tensorflow:batch_all_reduce: 202 all-reduces with algorithm = nccl, num_packs = 1






INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:GPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:GPU:0', '/job:localhost/replica:0/task:0/device:GPU:1').


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:GPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:GPU:0', '/job:localhost/replica:0/task:0/device:GPU:1').




2022-08-11 11:19:36.234164: W tensorflow/core/grappler/optimizers/data/auto_shard.cc:776] AUTO sharding policy will apply DATA sharding policy as it failed to apply FILE sharding policy because of the following reason: Found an unshardable source dataset: name: "TensorSliceDataset/_2"
op: "TensorSliceDataset"
input: "Placeholder/_0"
input: "Placeholder/_1"
attr {
  key: "Toutput_types"
  value {
    list {
      type: DT_STRING
      type: DT_INT64
    }
  }
}
attr {
  key: "_cardinality"
  value {
    i: 2448
  }
}
attr {
  key: "is_files"
  value {
    b: false
  }
}
attr {
  key: "metadata"
  value {
    s: "\n\024TensorSliceDataset:5"
  }
}
attr {
  key: "output_shapes"
  value {
    list {
      shape {
      }
      shape {
        dim {
          size: 8
        }
      }
    }
  }
}
experimental_type {
  type_id: TFT_PRODUCT
  args {
    type_id: TFT_DATASET
    args {
      type_id: TFT_PRODUCT
      args {
        type_id: TFT_TENSOR
        args {
          type_id: TFT_STRI


Epoch 1: saving model to ../final_submission/muril_instancenorm_d0.4_fold1/checkpoints/01


2022-08-11 11:19:48.923959: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 606059520 exceeds 10% of free system memory.
2022-08-11 11:19:49.460944: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 606059520 exceeds 10% of free system memory.
2022-08-11 11:19:50.003597: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 606059520 exceeds 10% of free system memory.
2022-08-11 11:19:52.853850: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 606059520 exceeds 10% of free system memory.


INFO:tensorflow:Assets written to: /mnt/SSD0/rabin/EpiSuS/training/notebooks/wandb/run-20220811_111655-4ecajvs8/files/model-best/assets


INFO:tensorflow:Assets written to: /mnt/SSD0/rabin/EpiSuS/training/notebooks/wandb/run-20220811_111655-4ecajvs8/files/model-best/assets
[34m[1mwandb[0m: Adding directory to artifact (/mnt/SSD0/rabin/EpiSuS/training/notebooks/wandb/run-20220811_111655-4ecajvs8/files/model-best)... Done. 7.9s


Epoch 2/9
Epoch 2: saving model to ../final_submission/muril_instancenorm_d0.4_fold1/checkpoints/02
Epoch 3/9
Epoch 3: saving model to ../final_submission/muril_instancenorm_d0.4_fold1/checkpoints/03
Epoch 4/9
Epoch 4: saving model to ../final_submission/muril_instancenorm_d0.4_fold1/checkpoints/04
Epoch 5/9
Epoch 5: saving model to ../final_submission/muril_instancenorm_d0.4_fold1/checkpoints/05
Epoch 6/9

## Evaluate the model

Let's see how the model performs

In [None]:
# loss, accuracy, f1_score, auc = classifier_model.evaluate(val_ds)

# print(f"Loss: {loss}")
# print(f"Accuracy: {accuracy}")
# print(f"Weighted F1 Score at 0.5 Threshold: {f1_score}")
# print(f"AUC: {auc}")


## Plot the accuracy and loss over time

Based on the `History` object returned by `model.fit()`. You can plot the training and validation loss for comparison, as well as the training and validation accuracy:

In [None]:
# history_dict = history.history
# print(history_dict.keys())

# loss = history_dict["loss"]
# val_loss = history_dict["val_loss"]

# acc = history_dict["binary_accuracy"]
# val_acc = history_dict["val_binary_accuracy"]

# f1_score = history_dict["f1_score"]
# val_f1_score = history_dict["val_f1_score"]

# auc = history_dict["auc"]
# val_auc = history_dict["val_auc"]

# epochs = range(1, len(acc) + 1)
# fig = plt.figure(figsize=(10, 10))
# fig.tight_layout()

# plt.subplot(2, 2, 1)
# # r is for "solid red line"
# plt.plot(epochs, loss, "r", label="Training loss")
# # b is for "solid blue line"
# plt.plot(epochs, val_loss, "b", label="Validation loss")
# plt.title("Training and validation loss")
# # plt.xlabel('Epochs')
# plt.ylabel("Loss")
# plt.legend()

# plt.subplot(2, 2, 2)
# plt.plot(epochs, acc, "r", label="Training acc")
# plt.plot(epochs, val_acc, "b", label="Validation acc")
# plt.title("Training and validation accuracy")
# # plt.xlabel('Epochs')
# plt.ylabel("Accuracy")
# plt.legend()

# plt.subplot(2, 2, 3)
# plt.plot(epochs, f1_score, "r", label="Training f1_score")
# plt.plot(epochs, val_f1_score, "b", label="Validation f1_score")
# plt.title("Training and validation f1_score")
# # plt.xlabel('Epochs')
# plt.ylabel("f1_score")
# plt.legend()

# plt.subplot(2, 2, 4)
# plt.plot(epochs, auc, "r", label="Training auc")
# plt.plot(epochs, val_auc, "b", label="Validation auc")
# plt.title("Training and validation AUC")
# plt.xlabel("Epochs")
# plt.ylabel("AUC")
# plt.legend()


In this plot, the red lines represent the training loss and accuracy, and the blue lines are the validation loss and accuracy.