# Install libs

In [None]:
!pip install neural-structured-learning
!pip install tensorflow-text

Import libs

In [None]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split

import neural_structured_learning as nsl

import tensorflow as tf
import tensorflow_text as text
import tensorflow_hub as tfh

tf.keras.backend.clear_session()

# Read/shuffle/split

In [None]:
df = pd.read_csv('/kaggle/input/disaster-tweets-cleaned/df.csv')
test_df = pd.read_csv('/kaggle/input/disaster-tweets-cleaned/test_df.csv')

X_tr, X_val, y_tr, y_val = train_test_split(
    df['ctext'].values, df['target'].values,
    test_size = 0.15,
    shuffle = True, stratify = df['target']
)
X_test = test_df['ctext'].values
y_test = test_df['target'].values
y_tr = np.reshape(y_tr, (-1, 1)).astype(np.float32)
y_val = np.reshape(y_val, (-1, 1)).astype(np.float32)
y_test = np.reshape(y_test, (-1, 1)).astype(np.float32)
print(X_tr.shape, y_tr.shape)
print(X_val.shape, y_val.shape)
print(X_test.shape, y_test.shape)

# BertTokenizer/Bert

In [None]:
preprocessor = tfh.KerasLayer("https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3")
embedding_handler = 'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-4_H-128_A-2/2'
embedding_layer = tfh.KerasLayer(embedding_handler, trainable = True, name = 'embedder')

# Fine Tune Bert

In [None]:
def make_model(seq_length = 40 ):
    text_input = tf.keras.layers.Input(shape=(), dtype=tf.string)
    encoder_inputs = preprocessor(text_input)
    x = embedding_layer(encoder_inputs,)
    x = x['pooled_output']
    output = tf.keras.layers.Dense(1, activation = 'sigmoid')(x)
    model = tf.keras.Model(text_input, output)
    model.compile(loss = 'binary_crossentropy', 
                  optimizer = tf.keras.optimizers.Adam(1e-4), 
                  metrics = ['acc'])
    return model

model = make_model()
model.fit(x = X_tr, y = y_tr, epochs = 3, validation_data = (X_val, y_val))

# Get Feature extractor from Fine-Tuned Bert

In [None]:
text_input = tf.keras.layers.Input(shape=(), dtype=tf.string)
encoder_inputs = preprocessor(text_input)
embedd = model.get_layer('embedder')(encoder_inputs)
embedder = tf.keras.Model(text_input, embedd)

# Create embeddings for the data

In [None]:
embed_X_tr = embedder(X_tr)['pooled_output'].numpy()
embed_X_val = embedder(X_val)['pooled_output'].numpy()
embed_X_test = embedder(X_test)['pooled_output'].numpy()

np.save('X_tr', embed_X_tr)
np.save('X_val', embed_X_val)
np.save('X_test', embed_X_test)

In [None]:
embed_X_tr = np.load('X_tr.npy')
embed_X_val = np.load('X_val.npy')
embed_X_test = np.load('X_test.npy')

In [None]:
IMAGE_INPUT_NAME = 'image'
LABEL_INPUT_NAME = 'label'
batch_size = 256

# Data Pipeline

In [None]:
def convert_to_tuples(features):
    return features[IMAGE_INPUT_NAME], features[LABEL_INPUT_NAME]

def convert_to_dictionaries(image, label):
    return {IMAGE_INPUT_NAME: image, LABEL_INPUT_NAME: label}

train_dataset = tf.data.Dataset.from_tensor_slices((embed_X_tr, y_tr))
val_dataset = tf.data.Dataset.from_tensor_slices((embed_X_val, y_val))
train_dataset = train_dataset.batch(batch_size)
val_dataset = val_dataset.batch(batch_size)

# Base Model

In [None]:
def make_feed_forward_model():
    inputs = tf.keras.Input(shape=(128,), dtype='float32', name=IMAGE_INPUT_NAME)
    dense_layer = tf.keras.layers.Dense(128, activation='relu')(inputs)
    dense_layer = tf.keras.layers.Dense(32, activation='relu')(dense_layer)
    outputs = tf.keras.layers.Dense(1, activation='sigmoid')(dense_layer)
    return tf.keras.Model(inputs=inputs, outputs=outputs)

classifier = make_feed_forward_model()
classifier.compile(loss = 'binary_crossentropy', optimizer = 'adam', metrics = ['acc'])
classifier.fit(train_dataset, validation_data = val_dataset, epochs = 15, verbose = 1)

In [None]:
results = classifier.evaluate(embed_X_test, y_test)
print(results)

# Adversarial Model

In [None]:
adv_config = nsl.configs.make_adv_reg_config(
    multiplier = 0.5,
    adv_step_size = 0.5,
    adv_grad_norm='infinity',
)

In [None]:
base_adv_model = make_feed_forward_model()
adv_model = nsl.keras.AdversarialRegularization(
    base_adv_model,
    label_keys=[LABEL_INPUT_NAME],
    adv_config=adv_config,
)
train_set_for_adv_model = tf.data.Dataset.from_tensor_slices((embed_X_tr, y_tr))
val_set_for_adv_model = tf.data.Dataset.from_tensor_slices((embed_X_val, y_val))
train_set_for_adv_model = train_set_for_adv_model.map(convert_to_dictionaries).batch(batch_size)
val_set_for_adv_model = val_set_for_adv_model.map(convert_to_dictionaries).batch(batch_size)

In [None]:
adv_model.compile(optimizer='adam', 
                  loss='binary_crossentropy', 
                  metrics=['binary_accuracy'])

adv_model.fit(train_set_for_adv_model,
              validation_data = val_set_for_adv_model, 
              epochs = 15)

In [None]:
results = base_adv_model.evaluate(embed_X_test, y_test)
print(results)

In [None]:
embed_X_test.shape