# Setup

We will use the AdamW optimizer from [tensorflow/models](https://github.com/tensorflow/models). `tensorflow-text` is a dependency of the preprocessing for BERT inputs.

In [1]:
import tensorflow as tf
import tensorflow_addons as tfa
import tensorflow_hub as hub

import tensorflow_text as text  # For preprocessor

import matplotlib.pyplot as plt

from sklearn.metrics import precision_recall_curve
import sklearn.metrics as skm

import pandas as pd
import numpy as np


# Make notebook reproducible

In [2]:
seed = 42
tf.random.set_seed(seed)
np.random.seed(seed)


In [3]:
# physical_devices= tf.config.list_physical_devices('GPU')
# for device in physical_devices:
#     tf.config.experimental.set_memory_growth(device, True)


# Load Dataset

## Loading the dataset in pandas and doing test, train split

In [4]:
BASE_DIR = "../datasets/nepali_tweets_dataset_labelled_tweets_feb_23"


In [5]:
FOLD = 5


In [6]:
import os.path

val_filepath = os.path.join(BASE_DIR, f"fold_{FOLD}.csv")


In [7]:
val_df = pd.read_csv(val_filepath)


In [8]:
val_df.head()


Unnamed: 0,text,covid_stats,vaccination,covid_politics,humour,lockdown,civic_views,life_during_pandemic,covid_waves_and_variants
0,"देशभर थपिए २,६२२ कोरोना संक्रमित, २३ जनाको मृत्यु",1,0,0,0,0,0,0,0
1,संसदको शिक्षा तथा स्वास्थ समितिले कक्षा १२ को ...,0,1,1,0,0,0,0,0
2,ले आफ्ना हवाईयात्रुमाझ हालै गरेको सर्वेक्षणले ...,1,1,0,0,0,1,0,0
3,केही आदत के बानि पुनर्जन्म लिएर आउदा पनि बदलिद...,0,1,0,1,0,0,0,0
4,सरकारले मन्त्रीपरिषदलाई पूर्णता दिन नसक्नु र ढ...,0,1,1,0,0,1,0,0


In [9]:
text_val = val_df["text"]
label_val = val_df.iloc[:, 1:]


In [10]:
NUM_CLASSES = len(label_val.columns)
NUM_CLASSES


8

## Create `tf.data.Dataset` from pandas dataframe

In [11]:
strategy = tf.distribute.MirroredStrategy()


2022-08-16 18:04:31.632478: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-08-16 18:04:32.487097: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1532] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 11386 MB memory:  -> device: 0, name: NVIDIA TITAN Xp, pci bus id: 0000:b4:00.0, compute capability: 6.1
2022-08-16 18:04:32.487663: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1532] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 10410 MB memory:  -> device: 1, name: NVIDIA GeForce GTX 1080 Ti, pci bus id: 0000:b3:00.0, compute capability: 6.1


INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0', '/job:localhost/replica:0/task:0/device:GPU:1')


In [12]:
print("Number of GPUs:", strategy.num_replicas_in_sync)


Number of GPUs: 2


In [13]:
BATCH_SIZE_PER_REPLICA = 32  # Decrease if OOM

GLOBAL_BATCH_SIZE = BATCH_SIZE_PER_REPLICA * strategy.num_replicas_in_sync


In [14]:
val_ds = (
    tf.data.Dataset.from_tensor_slices((text_val, label_val))
    .batch(GLOBAL_BATCH_SIZE)
    .cache()
)


# Loading models from TensorFlow Hub

In [15]:
model = "muril"


In [16]:
if model == "muril":
    tfhub_handle_encoder = "https://tfhub.dev/google/MuRIL/1"
    tfhub_handle_preprocess = "https://tfhub.dev/google/MuRIL_preprocess/1"
else:
    tfhub_handle_encoder = (
        "https://tfhub.dev/tensorflow/bert_multi_cased_L-12_H-768_A-12/4"
    )
    tfhub_handle_preprocess = (
        "https://tfhub.dev/tensorflow/bert_multi_cased_preprocess/3"
    )


The used model is [MURIL: Multilingual Representations for Indian Languages](https://arxiv.org/abs/2103.10730). A BERT model pre-trained on 17 Indian languages, and their transliterated counterparts.

# Define your model

You will create a very simple fine-tuned model, with the preprocessing model, the selected BERT model, one Dense and a Dropout layer.

In [17]:
DROPOUT_RATE = 0.5


In [18]:
def build_classifier_model():
    text_input = tf.keras.layers.Input(shape=(), dtype=tf.string, name="text")
    preprocessing_layer = hub.KerasLayer(tfhub_handle_preprocess, name="preprocessing")
    encoder_inputs = preprocessing_layer(text_input)
    encoder = hub.KerasLayer(tfhub_handle_encoder, trainable=True, name="BERT_encoder")
    outputs = encoder(encoder_inputs)
    net = outputs["pooled_output"]
    net = tf.keras.layers.Dropout(DROPOUT_RATE)(net)
    net = tf.keras.layers.BatchNormalization()(net)
    net = tf.keras.layers.Dense(
        NUM_CLASSES,
        name="classifier",
    )(net)
    return tf.keras.Model(text_input, net)


In [19]:
with strategy.scope():
    classifier_model = build_classifier_model()


2022-08-16 18:04:42.468301: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 606059520 exceeds 10% of free system memory.


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).


In [20]:
BEST_EPOCHs = (3, 2, 3, 4, 3)


In [21]:
model_epoch = BEST_EPOCHs[FOLD - 1]

checkpoint_dir = f"../final_submission/muril_batchnorm_d{DROPOUT_RATE}_fold{FOLD}/checkpoints/{model_epoch:02d}"
classifier_model.load_weights(checkpoint_dir)


2022-08-16 18:04:45.881240: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 606059520 exceeds 10% of free system memory.


<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7fbe64568be0>

## Loading the BERT model and training

Using the classifier_model you created earlier, you can compile the model with the loss, metric and optimizer.

In [22]:
model_label_predict = classifier_model.predict(val_ds, verbose=1)


2022-08-16 18:04:48.619025: W tensorflow/core/grappler/optimizers/data/auto_shard.cc:776] AUTO sharding policy will apply DATA sharding policy as it failed to apply FILE sharding policy because of the following reason: Found an unshardable source dataset: name: "TensorSliceDataset/_2"
op: "TensorSliceDataset"
input: "Placeholder/_0"
input: "Placeholder/_1"
attr {
  key: "Toutput_types"
  value {
    list {
      type: DT_STRING
      type: DT_INT64
    }
  }
}
attr {
  key: "_cardinality"
  value {
    i: 2449
  }
}
attr {
  key: "is_files"
  value {
    b: false
  }
}
attr {
  key: "metadata"
  value {
    s: "\n\024TensorSliceDataset:0"
  }
}
attr {
  key: "output_shapes"
  value {
    list {
      shape {
      }
      shape {
        dim {
          size: 8
        }
      }
    }
  }
}
experimental_type {
  type_id: TFT_PRODUCT
  args {
    type_id: TFT_DATASET
    args {
      type_id: TFT_PRODUCT
      args {
        type_id: TFT_TENSOR
        args {
          type_id: TFT_STRI



In [23]:
model_label_predict


array([[ 2.4631066 , -2.1439724 , -2.1441016 , ..., -2.1448798 ,
        -2.224648  , -2.264816  ],
       [-2.6337361 ,  1.9093306 , -0.61603755, ..., -1.9338713 ,
        -2.0850768 , -2.459852  ],
       [-1.9571874 ,  1.8253846 , -2.0407062 , ..., -1.5352501 ,
        -1.7386205 , -2.1552815 ],
       ...,
       [-1.7718469 , -1.902194  , -2.4454436 , ..., -1.9008374 ,
        -0.4464903 , -1.405213  ],
       [ 1.8137903 , -2.1037557 , -2.0843458 , ..., -2.1232135 ,
        -2.2711468 , -2.3213222 ],
       [-2.4020834 , -1.9544864 , -2.4577768 , ..., -0.87320286,
        -1.7972351 , -2.1282306 ]], dtype=float32)

In [24]:
# precision recall curve, all labels in a single plot
precision_list = []
recall_list = []

threshold_array = np.zeros_like(label_val.columns, dtype=np.float32)
f1_score_array = np.zeros_like(label_val.columns, dtype=np.float32)
auc_array = np.zeros_like(label_val.columns, dtype=np.float32)

for i, label in enumerate(label_val.columns):
    precision, recall, thresholds = precision_recall_curve(
        label_val.iloc[:, i], model_label_predict[:, i]
    )

    precision_list.append(precision)
    recall_list.append(recall)

    f1_score = 2 * precision * recall / (precision + recall + 1e-16)

    thresh_arg = f1_score.argmax()

    max_f1_score = f1_score[thresh_arg]
    max_thresh = thresholds[thresh_arg]

    auc_pr = skm.auc(recall, precision)

    threshold_array[i] = max_thresh
    f1_score_array[i] = max_f1_score
    auc_array[i] = auc_pr

    print(
        f"{label}: Max F1 Score {max_f1_score} at Threshold: {max_thresh} with AUC: {auc_pr}"
    )


covid_stats: Max F1 Score 0.9095354523227385 at Threshold: -1.574108600616455 with AUC: 0.9628050091294714
vaccination: Max F1 Score 0.9721728833629367 at Threshold: -0.24700897932052612 with AUC: 0.9855759431916088
covid_politics: Max F1 Score 0.7309458218549127 at Threshold: -1.5019621849060059 with AUC: 0.7836967894199913
humour: Max F1 Score 0.760932944606414 at Threshold: -1.1269872188568115 with AUC: 0.7878564526863515
lockdown: Max F1 Score 0.9722675367047308 at Threshold: -0.5269203186035156 with AUC: 0.9943871438124847
civic_views: Max F1 Score 0.7406749555950266 at Threshold: -0.8582739233970642 with AUC: 0.7707474013722544
life_during_pandemic: Max F1 Score 0.6024999999999999 at Threshold: -1.548545241355896 with AUC: 0.6107519681798247
covid_waves_and_variants: Max F1 Score 0.8458274398868458 at Threshold: -1.4590978622436523 with AUC: 0.9140972019627063


In [25]:
results = pd.DataFrame(
    {
        "f1_score": f1_score_array,
        # "threshold": threshold_array,
        "auc": auc_array,
    },
    index=label_val.columns,
)


In [26]:
results


Unnamed: 0,f1_score,auc
covid_stats,0.909535,0.962805
vaccination,0.972173,0.985576
covid_politics,0.730946,0.783697
humour,0.760933,0.787856
lockdown,0.972268,0.994387
civic_views,0.740675,0.770747
life_during_pandemic,0.6025,0.610752
covid_waves_and_variants,0.845827,0.914097


In [27]:
results.mean()


f1_score    0.816857
auc         0.851240
dtype: float32

In [28]:
pos = label_val.sum()
pos


covid_stats                 415
vaccination                 836
covid_politics              507
humour                      321
lockdown                    311
civic_views                 557
life_during_pandemic        340
covid_waves_and_variants    344
dtype: int64

In [29]:
(results["auc"] * pos).sum() / pos.sum()


0.8632353528804668

In [30]:
thresh_pred = model_label_predict >= threshold_array


In [31]:
skm.f1_score(label_val, thresh_pred, average="micro")


0.825451647183847

In [32]:
skm.f1_score(label_val, thresh_pred, average="macro")


0.8168571292917006

In [33]:
skm.f1_score(label_val, thresh_pred, average="weighted")


0.8305669357221502