In [None]:
# ML experiment of our moderation approach

In [None]:
#!pip install transformers==3.5.1 sentence-transformers==0.3.9
#!pip install --upgrade tensorflow-gpu

In [None]:
import pickle
import numpy as np
import pandas as pd
import tensorflow

from tqdm import tqdm, trange

from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix

from matplotlib import pyplot as plt
import seaborn as sns

from sentence_transformers import SentenceTransformer
from sentence_transformers import models as stm

from sklearn.metrics import f1_score, balanced_accuracy_score

plt.style.use('seaborn-whitegrid')

In [None]:
# load data
nRowsRead = None
df0 = pd.read_csv('labeled_data.csv', delimiter=',', nrows = nRowsRead)
df0.dataframeName = 'labeled_data.csv'
nRow, nCol = df0.shape
print(f'There are {nRow} rows and {nCol} columns')

In [None]:
df0.head()

In [None]:
## re-structure
c=df0['class']
df0.rename(columns={'tweet' : 'text', 'class' : 'category'}, inplace=True)
a=df0['text']
b=df0['category'].map({0: 'hate_speech', 1: 'offensive_language',2: 'neither'})

df= pd.concat([a,b,c], axis=1)
df.rename(columns={'class' : 'label'},inplace=True)
df.head()

In [None]:
#binary labelling task
def mapper(x):
    if x == 0:
        return 0
    if x == 1:
        return 0
    if x == 2:
        return 1

df['label'] = df['label'].map(mapper)

In [None]:
blocked, valid = np.bincount(df['label'])
total = blocked + valid
print('Examples:\n    Total: {}\n    Valid (Neither): {} ({:.2f}% of total)\n'.format(
    total, valid, 100 * valid / total))
print('Examples:\n    Total: {}\n    Blocked (Ofensive and Hate): {} ({:.2f}% of total)\n'.format(
    total, blocked, 100 * blocked / total))

In [None]:
# split data

In [None]:
X_train_, X_test, y_train_, y_test = train_test_split(
    df.index.values,
    df.label.values,
    test_size=0.365,
    random_state=42,
    stratify=df.label.values,    
)

In [None]:
X_train, X_val, y_train, y_val = train_test_split(
    df.loc[X_train_].index.values,
    df.loc[X_train_].label.values,
    test_size=0.50,
    random_state=43,
    stratify=df.loc[X_train_].label.values,  
)

In [None]:
len(X_train), len(X_val), len(X_test)

In [None]:
df['data_type'] = ['not_set']*df.shape[0]
df.loc[X_train, 'data_type'] = 'train'
df.loc[X_val, 'data_type'] = 'val'
df.loc[X_test, 'data_type'] = 'test'

In [None]:
df.groupby(['category', 'label', 'data_type']).count()

In [None]:
df_train = df.loc[df["data_type"]=="train"]
df_train.head(5)

In [None]:
df_val = df.loc[df["data_type"]=="val"]
df_val.head(5)

In [None]:
df_test = df.loc[df["data_type"]=="test"]
df_test.head(5)

In [None]:
X_train = df_train['text'].values
y_train = df_train['label'].values

X_val = df_val['text'].values
y_val = df_val['label'].values

X_test = df_test['text'].values
y_test = df_test['label'].values

In [None]:
# Data Preperation

In [None]:
embedding_model = SentenceTransformer('distilbert-base-cased')

In [None]:
X_train = embedding_model.encode(X_train, show_progress_bar=True)
X_val = embedding_model.encode(X_val, show_progress_bar=True)
X_test = embedding_model.encode(X_test, show_progress_bar=True)

In [None]:
from tensorflow.keras import Sequential 
from tensorflow.keras.layers import Dense, Dropout, Input
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import Model

In [None]:
y_train = to_categorical(y_train)
y_val = to_categorical(y_val)
y_test = to_categorical(y_test)

In [None]:
# define model

In [None]:
input = Input(shape=(768))
X = Dropout(0.5)(input, training=True)
X = Dense(1000, activation='relu')(X)
X = Dropout(0.5)(X, training=True)
X = Dense(2, activation='softmax')(X)
model = Model(inputs=input, outputs = X)

In [None]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) # categorical_crossentropy

In [None]:
history = model.fit(X_train, y_train, epochs=15, batch_size=16, validation_data=(X_val, y_val))

In [None]:
print(history.history.keys())
#  "Accuracy"
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()
# "Loss"
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()

In [None]:
#model.save("model")

In [None]:
# load model
#model = tensorflow.keras.models.load_model("model")

In [None]:
def predict_with_uncertainty(model, x, no_classes=2, n_iter=50):
    result = np.zeros((n_iter,) + (x.shape[0], no_classes))
    for i in range(n_iter):
        result[i,:, :] = model.predict(x)

    prediction = result.mean(axis=0)
    return prediction

In [None]:
prediction = predict_with_uncertainty(model, X_test)
y_pred = prediction.argmax(axis=1)

In [None]:
# balanced accuracy
balanced_accuracy_score(y_test.argmax(axis=1), y_pred)

In [None]:
# f1_score
f1_score(y_test.argmax(axis=1), y_pred, average='weighted')

In [None]:
# create df
data = {'y_pred': y_pred, 'y_true': y_test.argmax(axis=1), 'unc': 1 - prediction.max(axis=1), }
df = pd.DataFrame(data=data)
df['def'] = df['y_pred'] != df['y_true']
df.head()

In [None]:
# sort by uncertainty
df_sort = df.sort_values('unc')

In [None]:
# calc confusion_matrix
pred_def = df_sort['def'].values
y_true = df_sort['y_true'].values
y_pred = df_sort['y_pred'].values
cm = confusion_matrix(y_true, y_pred, normalize='true')

In [None]:
# plot confusion_matrix
labels = ['True Neg', 'False Pos', 'False Neg', 'True Pos']
categories = ['Zero', 'One']

df_cm = pd.DataFrame(cm, range(2), range(2))

sns.set(font_scale=1.6) # for label size
sns.heatmap(df_cm, annot=True, fmt='.2%', cmap='Blues', 
           xticklabels=['Valid', 'Blocked'],
           yticklabels=['Valid', 'Blocked'],
           cbar=False
          ) # font size
plt.xlabel("Predicted Labels")
plt.ylabel("True Labels")
plt.show()

In [None]:
# calc confusion_matrix with manual moderation
mod = 1809
balanced_accuracy_score(y_true, np.concatenate((y_pred[:len(y_true)-mod], y_true[len(y_true)-mod:])))

In [None]:
cm = confusion_matrix(y_true, np.concatenate((y_pred[:len(y_true)-mod], y_true[len(y_true)-mod:])), normalize='true')
cm

In [None]:
# plot confusion_matrix with manual moderation
array = cm

labels = ['True Neg', 'False Pos', 'False Neg', 'True Pos']
categories = ['Zero', 'One']

df_cm = pd.DataFrame(array, range(2), range(2))

sns.set(font_scale=1.6) # for label size
sns.heatmap(df_cm, annot=True, fmt='.2%', cmap='Blues', 
           xticklabels=['Blocked', 'Valid'],
           yticklabels=['Blocked', 'Valid'],
           cbar=False
          ) # font size
plt.xlabel("Predicted Labels")
plt.ylabel("True Labels")
plt.show()

In [None]:
# calc 
mod_effort = []

for i in tqdm(range(len(y_test)+1)):
    ai = y_pred[:len(y_test)-i]
    human = y_true[len(ai):]
    
    mod_y = np.concatenate((ai, human))
    f1 = balanced_accuracy_score(y_true, mod_y)
    mod_effort.append(f1)

In [None]:
# development of the balanced accuracy
fontsize = 20

plt.plot(mod_effort, label='Uncertainty')
plt.plot([0, len(mod_effort)], [mod_effort[0], 1], 'black', linestyle='dashed', label='Random')
plt.xlim((0, len(mod_effort)))
plt.xticks(np.arange(0, 9047+1809.4, 1809.4), fontsize=fontsize)
plt.yticks(np.arange(0.8, 1.05, 0.05), fontsize=fontsize)
plt.ylabel('Balanced Accuracy', fontsize=fontsize)
plt.xlabel('Moderation Effort', fontsize=fontsize)
plt.xticks(np.arange(0, 9047+1809.4, 1809.4), ['0%', '20%', '40%', '60%', '80%', '100%'])
plt.ylim((mod_effort[0], 1.0005))
l = plt.legend(frameon=True, fontsize=fontsize, title="Moderation Strategy", fancybox=True)
plt.setp(l.get_title(),fontsize=18)
plt.show()