In [1]:
## Code adapted from: https://www.analyticsvidhya.com/ + 
## blog/2021/12/text-classification-using-bert-and-tensorflow/

import pandas as pd
from sklearn.model_selection import train_test_split
import tensorflow as tf
import tensorflow_hub as hub
import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn import metrics

## Data
df = pd.read_csv("../Data/prelim_dataset.csv")

## Split data for training and testing (default: 75% train / 25% test)
X_train, X_test, y_train, y_test = train_test_split(df['text'],df['Islamophobic?'], stratify=df['Islamophobic?'], random_state = 0)

bert_preprocess = hub.KerasLayer("https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3")
bert_encoder = hub.KerasLayer("https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/4")
## Bert layers
text_input = tf.keras.layers.Input(shape=(), dtype=tf.string, name='text')
preprocessed_text = bert_preprocess(text_input)
outputs = bert_encoder(preprocessed_text)

## Neural network layers
l = tf.keras.layers.Dropout(0.1, name="dropout")(outputs['pooled_output'])
l = tf.keras.layers.Dense(1, activation='sigmoid', name="output")(l)

## Use inputs and outputs to construct a final model
model = tf.keras.Model(inputs=[text_input], outputs = [l])
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=2, batch_size = 32)
y_predicted = model.predict(X_test)
y_predicted = y_predicted.flatten()

## Threshold
y_predicted = np.where(y_predicted > 0.5, 1, 0)

## Metrics
accuracy_score(y_test, y_predicted)
print("Macro:", f1_score(y_test, y_predicted, average='macro'))
print("Micro:", f1_score(y_test, y_predicted, average='micro'))
print(metrics.classification_report(y_test, y_predicted))

Epoch 1/2
Epoch 2/2
Macro: 0.4515235457063712
Micro: 0.8232323232323232
