# SOCIAL BIAS INDICATOR

#### Pooja Kangokar Pranesh
#### DS 690: Introduction to Natural Language Processing

#### Imports

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

!pip install -qq transformers
!pip install -qq gradio

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [None]:
import torch
import numpy as np
import matplotlib.pyplot as plt

from google.colab import drive
from torch import nn
from torch.optim import Adam
from transformers import GPT2Model, GPT2Tokenizer
from tqdm import tqdm

from sklearn.metrics import confusion_matrix
from sklearn.metrics import ConfusionMatrixDisplay

import gradio as gr

#### Standard Model

In [None]:
class SimpleGPT2SequenceClassifier(nn.Module):
    '''
    This class loads the GPT2 Pretrained Model
    '''
    def __init__(self, hidden_size: int, num_classes:int ,max_seq_len:int, gpt_model_name:str):
        super(SimpleGPT2SequenceClassifier,self).__init__()
        self.gpt2model = GPT2Model.from_pretrained(gpt_model_name)
        self.fc1 = nn.Linear(hidden_size*max_seq_len, num_classes)

        
    def forward(self, input_id, mask):
        
        gpt_out, _ = self.gpt2model(input_ids=input_id, attention_mask=mask, return_dict=False)
        batch_size = gpt_out.shape[0]
        linear_output = self.fc1(gpt_out.view(batch_size,-1))
        return linear_output

#### Prediction 

In [None]:
def predict_hasbiasornot(text, labels_map_v, model_path):

  if('Category' in model_path):
    num_classes = 50
  else:
    num_classes = 2

  model_new1 = SimpleGPT2SequenceClassifier(hidden_size=768, num_classes=num_classes, max_seq_len=128, gpt_model_name="gpt2")
  model_new1.load_state_dict(torch.load(model_path))
  model_new1.eval()

  fixed_text = " ".join(text.lower().split())
  # tokenize the text
  tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
  tokenizer.padding_side = "left"
  tokenizer.pad_token = tokenizer.eos_token
  # create model input
  model_input = tokenizer(fixed_text, padding='max_length', max_length=128, truncation=True, return_tensors="pt")

  mask = model_input['attention_mask'].cpu()
  input_id = model_input["input_ids"].squeeze(1).cpu()

  output = model_new1(input_id, mask)
  prob = torch.nn.functional.softmax(output, dim=1)[0]

  labels_map = labels_map_v

  pred_label = labels_map[output.argmax(dim=1).item()]

  return pred_label

#### Lables for each model prediction

In [None]:
# lables for offensive model
labels_map_o = {
      0: "not offensive",
      1: "offensive"
          }

# lables for biased model
labels_map_b = {
      0: "biased",
      1: "unbiased"
          }

# lables for category sterotype model
labels_map_c = {'[]': 0, '["social"]': 1, '["culture"]': 2, '["culture", "gender"]': 3, '["gender"]': 4, '["disabled"]': 5, '["race"]': 6, '["victim"]': 7, '["gender", "race"]': 8, '["body"]': 9, '["culture", "race"]': 10, '["culture", "social"]': 11, '["gender", "victim"]': 12, '["disabled", "race"]': 13, '["race", "victim"]': 14, '["race", "social"]': 15, '["gender", "social"]': 16, '["culture", "victim"]': 17, '["body", "gender"]': 18, '["culture", "disabled"]': 19, '["body", "race"]': 20, '["culture", "race", "victim"]': 21, '["disabled", "gender"]': 22, '["social", "victim"]': 23, '["gender", "race", "victim"]': 24, '["body", "culture"]': 25, '["body", "disabled"]': 26, '["body", "victim"]': 27, '["disabled", "victim"]': 28, '["culture", "gender", "victim"]': 29, '["body", "culture", "race"]': 30, '["body", "social"]': 31, '["culture", "disabled", "race"]': 32, '["race", "social", "victim"]': 33, '["disabled", "gender", "victim"]': 34, '["disabled", "social"]': 35, '["culture", "gender", "race"]': 36, '["body", "gender", "victim"]': 37, '["disabled", "gender", "race"]': 38, '["gender", "race", "social"]': 39, '["gender", "social", "victim"]': 40, '["culture", "gender", "social"]': 41, '["culture", "race", "social"]': 42, '["body", "gender", "social"]': 43, '["culture", "gender", "race", "social"]': 44, '["culture", "social", "victim"]': 45, '["body", "gender", "race"]': 46, '["body", "disabled", "race"]': 47, '["culture", "disabled", "victim"]': 48, '["body", "disabled", "social"]': 49}

#labels_map_c
labels_test_c = {}
for k, v in labels_map_c.items():
  key, value = k, v
  labels_test_c[value] = k

#### Social Bias Indicator

In [None]:
def social_bias_indicator(model_type, input_text):
  # /content/gdrive/My Drive/Colab Notebooks/models/Offensive.pt
  if model_type == 'Offensive':
	  return predict_hasbiasornot(input_text, labels_map_o, "/content/gdrive/My Drive/Colab Notebooks/models/Offensive.pt")
  if model_type == 'Category':
    return predict_hasbiasornot(input_text, labels_test_c, "/content/gdrive/My Drive/Colab Notebooks/models/Category.pt")
  if model_type == 'BiasImplication':
    return predict_hasbiasornot(input_text, labels_map_b, "/content/gdrive/My Drive/Colab Notebooks/models/BiasImplication.pt")


1. I thought drugs were the only things black people could shoot up Boy was I wrong

2.  I hate fat bitches

3.  ”Black guy in class: *attempts to throw a paper ball into the trash* *misses* Mrs. Mundy: ”You’re a disgrace to your race, Marcus”

4. All-female casts bother me but I have literally never noticed the hundreds of movies with 0 female characters I have already watched

5. Amercians hate Immigrants.

6. You annoyed me every day for months you’re a fucking moron

7. "Love is like the wind, you can't see it but you can feel it."





In [None]:
#Create the input component for Gradio since we are expecting 2 inputs

Model_type = gr.Textbox(label = "Enter the Model Type - Offensive, Category, BiasImplication")
input_text = gr.Textbox(label = "Enter the Post/Comment/Tweet/Sentence")
# We create the output
output = gr.Textbox()

app = gr.Interface(fn = social_bias_indicator, inputs=[Model_type, input_text], outputs=output)
app.launch(share=True)

Colab notebook detected. To show errors in colab notebook, set `debug=True` in `launch()`
Running on public URL: https://4355be877f26e20a.gradio.app

This share link expires in 72 hours. For free permanent hosting and GPU upgrades (NEW!), check out Spaces: https://huggingface.co/spaces


