# Testing Instruction Fine-Tuned LLMS
This notebook is made to offer means for testing the hand-crafted prompting method on a selection of Instruction Fine-Tuned LLMs.

## Disclaimer
This notebook is made to recreate Plaza-Del-Arco, F., Nozza, D., & Hovy, D. (2023). Respectful or Toxic? Using Zero-Shot Learning with Language Models to Detect Hate Speech. Retrieved from https://aclanthology.org/2023.woah-1.6.pdf

## Instruction Fine-Tuned Model
In this section, we try the instruction fine-tuned models with their prompt template.

In [1]:
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, AutoModelForCausalLM, AutoModelForSequenceClassification
from sklearn.metrics import f1_score
import pandas as pd
from tqdm.notebook import tqdm

In [None]:
from google.colab import drive
drive.mount('/content/drive')

### Load Model and Tokenizer
You cann load either Flan-T5 or mT-0. Run the cell of the desired model. By default if you run the notebook, Flan-T5 is loaded.

In [2]:
model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-xl")
tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-xl")

config.json:   0%|          | 0.00/1.44k [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/53.0k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.45G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/1.95G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/2.54k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

In [None]:
if not model:
    model = AutoModelForSeq2SeqLM.from_pretrained("bigscience/mt0-base")
    tokenizer = AutoTokenizer.from_pretrained("bigscience/mt0-base")

### Setting the Prompt Template

In [3]:
prompt_template = "Classify this text as hate or non-hate. Text:"
output_indicator = "Answer:"

### Loading Test Data

In [4]:
mhs = pd.read_csv('/content/drive/MyDrive/mhs_preprocessed_data.csv')
mhs.describe()

Unnamed: 0,HS
count,135556.0
mean,0.361828
std,0.480531
min,0.0
25%,0.0
50%,0.0
75%,1.0
max,1.0


In [13]:
hateeval = pd.read_csv('/content/drive/MyDrive/hateeval_preprocessed_data.csv')
hateeval.describe()

Unnamed: 0,HS
count,10000.0
mean,0.421
std,0.493744
min,0.0
25%,0.0
50%,0.0
75%,1.0
max,1.0


In [18]:
labels = {
    "0": "Non-Hate",
    "1": "Hate",
}

### Concatenate Prompt Template to Input Samples

In [5]:
def concat_prompt_template(df_column):
    return df_column.apply(lambda x: f"{prompt_template} {x}. {output_indicator}")

In [6]:
mhs['input'] = concat_prompt_template(mhs['text'])
mhs_text_data = mhs['input'].astype("str").tolist()

In [14]:
hateeval['input'] = concat_prompt_template(hateeval['text'])
hateeval_text_data = hateeval['input'].astype("str").tolist()

### Tokenize and Predict

In [7]:
model.to('cuda')

T5ForConditionalGeneration(
  (shared): Embedding(32128, 2048)
  (encoder): T5Stack(
    (embed_tokens): Embedding(32128, 2048)
    (block): ModuleList(
      (0): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=2048, out_features=2048, bias=False)
              (k): Linear(in_features=2048, out_features=2048, bias=False)
              (v): Linear(in_features=2048, out_features=2048, bias=False)
              (o): Linear(in_features=2048, out_features=2048, bias=False)
              (relative_attention_bias): Embedding(32, 32)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
            (DenseReluDense): T5DenseGatedActDense(
              (wi_0): Linear(in_features=2048, out_features=5120, bias=False)
              (wi_1): Linear(in_features=2048, out_features=5120, bias=False)
       

In [8]:
import torch
batch_size=int(torch.cuda.get_device_properties(0).total_memory/1e9+.5)*16
print(f'GPU mem:{int(torch.cuda.get_device_properties(0).total_memory/1e9+.5)}G, batch_size:{batch_size}')

GPU mem:16G, batch_size:256


In [10]:
batch_size = 16

In [16]:
def batch_inference(data_list):
    decoded = list()
    for i in tqdm(range(0, len(data_list)-batch_size, batch_size)):
        inputs = tokenizer(data_list[i: i+batch_size], return_tensors="pt", padding=True, truncation=True)
        inputs = {key: value.to('cuda:0') for key, value in inputs.items()}
        sequences = model.generate(**inputs, do_sample=True, min_length=0, max_length=10, temperature=0.001)
        decoded.extend(tokenizer.batch_decode(sequences, skip_special_tokens=True))
    return decoded
decoded = batch_inference(hateeval_text_data)

  0%|          | 0/624 [00:00<?, ?it/s]

In [19]:
filename = 'flan_t5_results.csv'
file_object = open(filename, 'w')
for i, out in enumerate(decoded):
    file_object.write(out)
    file_object.write(',')
    file_object.write(labels[str(hateeval["HS"][i])])
    file_object.write('\n')
file_object.close()

In [20]:
results = pd.read_csv(f"/content/drive/MyDrive/{filename}")
results.columns = [ "truth","output"]
results.head()

Unnamed: 0,truth,output
0,Non-hate,Hate Speech
1,Hate,Hate Speech
2,Non-hate,Non-Hate
3,Hate,Non-Hate
4,Non-hate,Hate Speech


### Answer Mapping

In [2]:
filename = 'flan_t5_results_hateeval.csv'
results = pd.read_csv(filename)
results.columns = ["output", "truth"]
results.describe()

Unnamed: 0,output,truth
count,9983,9983
unique,6,2
top,Hate,Non-Hate
freq,4385,5783


### Qualitative Analysis
First of all, we want to know how many answers were not mapped to either "Hate" or "Non-Hate"

In [3]:
results.output.unique()

array(['Non-hate', 'Hate', 'non-hate', 'hate', 'Non-hat', 'non-hat'],
      dtype=object)

In [4]:
results['isClean'] = results['output'].apply(lambda x: x.lower() == 'hate' or x.lower() == 'non-hate' or x.lower() == 'non-hat' or x.lower() == 'hate speech')

In [5]:
results.head()

Unnamed: 0,output,truth,isClean
0,Non-hate,Hate Speech,True
1,Hate,Hate Speech,True
2,Non-hate,Non-Hate,True
3,Hate,Non-Hate,True
4,Non-hate,Hate Speech,True


In [6]:
results.truth.unique()

array(['Hate Speech', 'Non-Hate'], dtype=object)

In [7]:
results[results.isClean == True].isClean.sum()

9983

### Mapping the answer classes to either 1 or 0

In [8]:
def mapAnswers(answer):
    if answer.lower() == 'non-hate' or answer.lower() == 'non-hat':
        return 0
    elif answer.lower() == 'hate' or answer.lower() == 'hate speech':
        return 1
    else: return None

results['outputLabel'] = results['output'].apply(mapAnswers)
results['truthLabel'] = results['truth'].apply(mapAnswers)

In [9]:
results.isnull().sum()

output         0
truth          0
isClean        0
outputLabel    0
truthLabel     0
dtype: int64

### Model Evaluation using Macro-F1 Score

In [10]:
score = f1_score(results['truthLabel'], results['outputLabel'], average = 'macro')
score

0.745836376819889