# 1. Load Libraries

In [2]:
import numpy as np
import pandas as pd
import torch
from transformers import AutoTokenizer, AutoModel
from huggingface_hub import hf_hub_download
import json
import onnxruntime as rt

In [3]:
reddit_df = pd.read_csv('./data/combined_cleaned_500k.csv',  lineterminator='\n', encoding='utf8')

In [4]:
reddit_df.head()

Unnamed: 0,text,timestamp,username,link,link_id,parent_id,id,subreddit_id,moderation\r
0,i think most singaporeans dont give a damn who...,2020-04-11 15:49:23,invigo79,/r/singapore/comments/fz7vtl/im_quite_interest...,t3_fz7vtl,t3_fz7vtl,fn3gbrg,t5_2qh8c,"{'removal_reason': None, 'collapsed': False, '..."
1,fair point the secrecy aspect of it slipped my...,2020-04-03 09:59:08,potatetoe_tractor,/r/singapore/comments/fu3axm/government_to_tab...,t3_fu3axm,t1_fmasya5,fmau5k3,t5_2qh8c,"{'removal_reason': None, 'collapsed': False, '..."
2,range,2020-02-15 15:07:03,CrossfittJesus,/r/singapore/comments/f4ac70/what_is_ps_defens...,t3_f4ac70,t3_f4ac70,fhp05xc,t5_2qh8c,"{'removal_reason': None, 'collapsed': False, '..."
3,gt this is binary thinking because you think t...,2020-06-04 07:07:39,nomad80,/r/singapore/comments/gw55cx/notoracism/fsu4fyd/,t3_gw55cx,t1_fsu3dsf,fsu4fyd,t5_2qh8c,"{'removal_reason': None, 'collapsed': False, '..."
4,boo boo poor u lmao,2020-10-31 13:52:12,pirorok,/r/singapore/comments/jl6abo/rsingapore_random...,t3_jl6abo,t1_gap4e9y,gap4vkl,t5_2qh8c,"{'removal_reason': None, 'collapsed': False, '..."


# 2. Test on the first 5 rows of data

In [5]:
small_reddit_df = reddit_df[0:5]

In [6]:
small_reddit_df

Unnamed: 0,text,timestamp,username,link,link_id,parent_id,id,subreddit_id,moderation\r
0,i think most singaporeans dont give a damn who...,2020-04-11 15:49:23,invigo79,/r/singapore/comments/fz7vtl/im_quite_interest...,t3_fz7vtl,t3_fz7vtl,fn3gbrg,t5_2qh8c,"{'removal_reason': None, 'collapsed': False, '..."
1,fair point the secrecy aspect of it slipped my...,2020-04-03 09:59:08,potatetoe_tractor,/r/singapore/comments/fu3axm/government_to_tab...,t3_fu3axm,t1_fmasya5,fmau5k3,t5_2qh8c,"{'removal_reason': None, 'collapsed': False, '..."
2,range,2020-02-15 15:07:03,CrossfittJesus,/r/singapore/comments/f4ac70/what_is_ps_defens...,t3_f4ac70,t3_f4ac70,fhp05xc,t5_2qh8c,"{'removal_reason': None, 'collapsed': False, '..."
3,gt this is binary thinking because you think t...,2020-06-04 07:07:39,nomad80,/r/singapore/comments/gw55cx/notoracism/fsu4fyd/,t3_gw55cx,t1_fsu3dsf,fsu4fyd,t5_2qh8c,"{'removal_reason': None, 'collapsed': False, '..."
4,boo boo poor u lmao,2020-10-31 13:52:12,pirorok,/r/singapore/comments/jl6abo/rsingapore_random...,t3_jl6abo,t1_gap4e9y,gap4vkl,t5_2qh8c,"{'removal_reason': None, 'collapsed': False, '..."


# 3. Model Text Classification

In [10]:
# Download model config
repo_path = "govtech/lionguard-v1"
config_path = hf_hub_download(repo_id=repo_path, filename="config.json")
with open(config_path, 'r') as f:
    config = json.load(f)

In [9]:
#print(config)

{'description': 'Binary classifier on harmful text in Singapore context', 'embedding': {'tokenizer': 'BAAI/bge-large-en-v1.5', 'model': 'BAAI/bge-large-en-v1.5', 'max_length': 512, 'batch_size': 32}, 'classifier': {'binary': {'calibrated': True, 'threshold': {'high_recall': 0.2, 'balanced': 0.5, 'high_precision': 0.8}, 'model_type': 'ridge_classifier', 'model_fp': 'models/lionguard-binary.onnx'}, 'hateful': {'calibrated': False, 'threshold': {'high_recall': -0.341, 'balanced': -0.186, 'high_precision': -0.008}, 'model_type': 'ridge_classifier', 'model_fp': 'models/lionguard-hateful.onnx'}, 'harassment': {'calibrated': False, 'threshold': {'high_recall': -0.571, 'balanced': -0.471, 'high_precision': -0.471}, 'model_type': 'ridge_classifier', 'model_fp': 'models/lionguard-harassment.onnx'}, 'public_harm': {'calibrated': False, 'threshold': {'high_recall': -0.713, 'balanced': -0.632, 'high_precision': -0.576}, 'model_type': 'ridge_classifier', 'model_fp': 'models/lionguard-public_harm.onn

## Embedding Function (Tokenise, Model Configuration and Embeds text data)

In [11]:
def get_embeddings(device, data):
    # Load the model and tokenizer
    tokenizer = AutoTokenizer.from_pretrained(config['embedding']['tokenizer'])
    model = AutoModel.from_pretrained(config['embedding']['model'])
    model.eval()
    model.to(device)

    # Generate the embeddings
    batch_size = config['embedding']['batch_size']
    num_batches = int(np.ceil(len(data)/batch_size))
    output = []
    for i in range(num_batches):
        sentences = data[i*batch_size:(i+1)*batch_size]
        encoded_input = tokenizer(sentences, max_length=config['embedding']['max_length'], padding=True, truncation=True, return_tensors='pt')
        encoded_input.to(device)
        with torch.no_grad():
            model_output = model(**encoded_input)
            sentence_embeddings = model_output[0][:, 0]
        sentence_embeddings = torch.nn.functional.normalize(sentence_embeddings, p=2, dim=1)
        output.extend(sentence_embeddings.cpu().numpy())
    
    return np.array(output)

## Predict Function (score prediction with and without thresholds)

In [12]:
def predict(batch_text):
    device = torch.device("cuda") if torch.cuda.is_available() else "cpu"
    embeddings = get_embeddings(device, batch_text)
    embeddings_df = pd.DataFrame(embeddings)

    # Prepare input data
    X_input = np.array(embeddings_df, dtype=np.float32)

    # Load the classifiers
    results = {}
    for category, details in config['classifier'].items():
        # Download the classifier from HuggingFace hub
        local_model_fp = hf_hub_download(repo_id=repo_path, filename=config['classifier'][category]['model_fp'])

        # Run the inference
        session = rt.InferenceSession(local_model_fp)
        input_name = session.get_inputs()[0].name
        outputs = session.run(None, {input_name: X_input})

        # If calibrated, return only the prediction for the unsafe class
        if config['classifier'][category]['calibrated']: 
            scores = [output[1] for output in outputs[1]]
        else:
            scores = outputs[1].flatten()
        
        # Generate the predictions depending on the recommended threshold score
        results[category] = {
            'scores': scores,
            'predictions': {
                'high_recall': [1 if score >= config['classifier'][category]['threshold']['high_recall'] else 0 for score in scores],
                'balanced': [1 if score >= config['classifier'][category]['threshold']['balanced'] else 0 for score in scores],
                'high_precision': [1 if score >= config['classifier'][category]['threshold']['high_precision'] else 0 for score in scores]
            }
        }

    return results

## Generate results in another dataframe

In [25]:
# Extract the text data and id from the DataFrame
batch_id = small_reddit_df['id'].tolist()
batch_text = small_reddit_df['text'].tolist()

# Generate the scores and predictions
results = predict(batch_text)

# Prepare results for DataFrame
output_data = []
for i in range(len(batch_text)):
    output_row = {
        'id': batch_id[i],
        'Text': batch_text[i],
    }
    # IMPT! THIS LOOP WILL PRODUCE 32 COLUMNS! COMMENT OUT IF NOT NEEDED!
    for category in results.keys():
        # scores
        output_row[f'{category} Score'] = results[category]['scores'][i]
        # predictions with highest recall
        output_row[f'{category} HR'] = results[category]['predictions']['high_recall'][i]
        # balanced predictions
        output_row[f'{category} B'] = results[category]['predictions']['balanced'][i]
        # predictions with highest precision
        output_row[f'{category} HP'] = results[category]['predictions']['high_precision'][i]
    output_data.append(output_row)

# Create a DataFrame from the results
small_results_df = pd.DataFrame(output_data)

In [26]:
# Set display option to show all columns
pd.set_option('display.max_columns', None)

# get results table
print(small_results_df)

        id                                               Text  binary Score  \
0  fn3gbrg  i think most singaporeans dont give a damn who...      0.008994   
1  fmau5k3  fair point the secrecy aspect of it slipped my...      0.000000   
2  fhp05xc                                              range      0.004988   
3  fsu4fyd  gt this is binary thinking because you think t...      1.000000   
4  gap4vkl                                boo boo poor u lmao      1.000000   

   binary HR  binary B  binary HP  hateful Score  hateful HR  hateful B  \
0          0         0          0      -0.582897           0          0   
1          0         0          0      -1.116735           0          0   
2          0         0          0      -1.027191           0          0   
3          1         1          1      -0.419287           0          0   
4          1         1          1      -0.952112           0          0   

   hateful HP  harassment Score  harassment HR  harassment B  harassment H

In [27]:
# See column names
print(small_results_df.columns)

Index(['id', 'Text', 'binary Score', 'binary HR', 'binary B', 'binary HP',
       'hateful Score', 'hateful HR', 'hateful B', 'hateful HP',
       'harassment Score', 'harassment HR', 'harassment B', 'harassment HP',
       'public_harm Score', 'public_harm HR', 'public_harm B',
       'public_harm HP', 'self_harm Score', 'self_harm HR', 'self_harm B',
       'self_harm HP', 'sexual Score', 'sexual HR', 'sexual B', 'sexual HP',
       'toxic Score', 'toxic HR', 'toxic B', 'toxic HP', 'violent Score',
       'violent HR', 'violent B', 'violent HP'],
      dtype='object')


## Hateful and Toxic Scores

In [28]:
# get id, hateful and toxic scores only
condensed_small_results_df = small_results_df[['id', 'hateful Score', 'toxic Score']]

In [29]:
condensed_small_results_df

Unnamed: 0,id,hateful Score,toxic Score
0,fn3gbrg,-0.582897,-0.419336
1,fmau5k3,-1.116735,-1.86936
2,fhp05xc,-1.027191,-0.798018
3,fsu4fyd,-0.419287,1.119167
4,gap4vkl,-0.952112,1.197502


## New dataframe with hateful and toxic scores

In [30]:
# merge the 2 dataframes on 'id'
small_hateful_and_toxic_results_df = pd.merge(small_reddit_df, condensed_small_results_df, on='id', how='inner')

In [31]:
print(small_hateful_and_toxic_results_df)

                                                text            timestamp  \
0  i think most singaporeans dont give a damn who...  2020-04-11 15:49:23   
1  fair point the secrecy aspect of it slipped my...  2020-04-03 09:59:08   
2                                              range  2020-02-15 15:07:03   
3  gt this is binary thinking because you think t...  2020-06-04 07:07:39   
4                                boo boo poor u lmao  2020-10-31 13:52:12   

            username                                               link  \
0           invigo79  /r/singapore/comments/fz7vtl/im_quite_interest...   
1  potatetoe_tractor  /r/singapore/comments/fu3axm/government_to_tab...   
2     CrossfittJesus  /r/singapore/comments/f4ac70/what_is_ps_defens...   
3            nomad80   /r/singapore/comments/gw55cx/notoracism/fsu4fyd/   
4            pirorok  /r/singapore/comments/jl6abo/rsingapore_random...   

     link_id   parent_id       id subreddit_id  \
0  t3_fz7vtl   t3_fz7vtl  fn3gbrg   

# 4. Quick Analysis

In [33]:
# expand resolution to see full text
pd.set_option('display.max_colwidth', None)

small_hateful_and_toxic_results_df[['text', 'hateful Score', 'toxic Score']]

Unnamed: 0,text,hateful Score,toxic Score
0,i think most singaporeans dont give a damn who taiwan belong to,-0.582897,-0.419336
1,fair point the secrecy aspect of it slipped my mind,-1.116735,-1.86936
2,range,-1.027191,-0.798018
3,gt this is binary thinking because you think that im inherently blind and because of the majority privilege\n\ngt this demonstrates your inability to accept opposing views and have no choice but to resort to using ad hominem by casting me into an opposing the others group making it a black and white binary argument in order to have an attempt in giving supposed strength to your argument\n\nwell this is clearly very complicated for you but to bring this back to where i came in the idea of normalizing edmws tone amp tenor just because you think it is anywhere commendable that they bash anyone outside their approved groups\n\nto which all i said was \n\ngt racist against everyone sounds the worst cesspool of assholes then\n\nany other place this would be a open shut point but and here we are with your spirited defense of this mindset,-0.419287,1.119167
4,boo boo poor u lmao,-0.952112,1.197502


Scores can range is [-1,1], where 1 is positive, -1 is negative in that metric. Overall, hatefulness and toxicity are correlated. However, the last 2 text data say otherwise, Lionguard predicts them as not hateful, but toxic. The last 2 texts also show that the longer text is more hateful, but less toxic than the shorter text.