In [1]:
import numpy as np
import pandas as pd
from collections import Counter
from sklearn.metrics import f1_score, classification_report

# Training

### Prompt Preparation

In [5]:
train = pd.read_csv("data/splits/train.csv")
val = pd.read_csv("data/splits/val.csv")
train.head()

Unnamed: 0.1,Unnamed: 0,id,tweet_id,aggression,offense,codemixed,tweet_text
0,6587,169269,1.58569e+18,0,0,1,Let's get some zimbabwe players into ipl and i...
1,6807,178238,1.55508e+18,2,0,0,@user What about millions of undertrials langu...
2,9120,449,1.58029e+18,2,1,1,@user 😂 he has to pay for it .. he burnt gandh...
3,5210,133816,1.58359e+18,0,1,0,@user ratio + mojitos outsold + only men drink...
4,4309,120099,1.54099e+18,0,1,0,"@user In India, ‘right-wing’ BJP govt gave wom..."


Exp Prompt:

Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request. ### Instruction: Evaluate this sentence for spelling and grammar mistakes ### Input: He finnished his meal and left the resturant ### Response: There are two spelling errors in the sentence. The corrected sentence should be: "He finished his meal and left the restaurant."

My Prompt:

You are an expert in hate speech detection. Offensive tweets are defined as tweets containing profane words, sarcastic remarks, insults, slanders or slurs. These can have a potentially harmful effect on a given target. Classify the following input tweet as Offensive or Non-Offensive.

`###` Input: <tweet>

`###` Response: Offensive

In [6]:
system_prompt = "You are an expert in hate speech detection. Offensive tweets are defined as tweets containing profane words, sarcastic remarks, insults, slanders or slurs. These can have a potentially harmful effect on a given target. Classify the following input tweet as Offensive or Non-Offensive."
label_map = {1: "Offensive", 0: "Non-Offensive"}

In [7]:
def prepare_prompt(row):
    # Data Format -- https://huggingface.co/datasets/vicgalle/alpaca-gpt4?row=0
    prompt = system_prompt + "\n\n### Input: " + row["tweet_text"] + "\n\n### Response: " + label_map[row["offense"]]
    return prompt

In [8]:
train["text"] = train.apply(lambda row: prepare_prompt(row), axis=1)
val["text"] = val.apply(lambda row: prepare_prompt(row), axis=1)

train["text"].values[:3]

array(["You are an expert in hate speech detection. Offensive tweets are defined as tweets containing profane words, sarcastic remarks, insults, slanders or slurs. These can have a potentially harmful effect on a given target. Classify the following input tweet as Offensive or Non-Offensive.\n\n### Input: Let's get some zimbabwe players into ipl and invite them to play series against india in india #zimbabwe #PAKvsZIM #T20worldcup22\n\n### Response: Non-Offensive",
       'You are an expert in hate speech detection. Offensive tweets are defined as tweets containing profane words, sarcastic remarks, insults, slanders or slurs. These can have a potentially harmful effect on a given target. Classify the following input tweet as Offensive or Non-Offensive.\n\n### Input: @user What about millions of undertrials languishing in jails for years without hearings. Recently 121 inmates were released after in captive for five years without proof. Are they not normal human beings?\n\n### Response: 

In [9]:
train[["id", "text", "offense"]].to_csv("data/llama_test/train.csv", index=False)
val[["id", "text", "offense"]].to_csv("data/llama_test/val.csv", index=False)

### Train

In [7]:
# MODEL: TinyPixel/Llama-2-7B-bf16-sharded --- Model sharded into 14 smaller models ~ 1gb each
#        abhishek/llama-2-7b-hf-small-shards --- 10 shards
# Max Length can go upto 4096

In [8]:
!autotrain llm --help

usage: autotrain <command> [<args>] llm [-h] [--train] [--deploy]
                                        [--inference] [--data_path DATA_PATH]
                                        [--train_split TRAIN_SPLIT]
                                        [--valid_split VALID_SPLIT]
                                        [--text_column TEXT_COLUMN]
                                        [--rejected_text_column REJECTED_TEXT_COLUMN]
                                        [--prompt-text-column PROMPT_TEXT_COLUMN]
                                        [--model MODEL]
                                        [--model-ref MODEL_REF]
                                        [--learning_rate LEARNING_RATE]
                                        [--num_train_epochs NUM_TRAIN_EPOCHS]
                                        [--train_batch_size TRAIN_BATCH_SIZE]
                                        [--warmup_ratio WARMUP_RATIO]
                                        [--gradient_a

In [18]:
# !pip install flash-attn --no-build-isolation

# FlashAttention-2 currently supports:
# Ampere, Ada, or Hopper GPUs (e.g., A100, RTX 3090, RTX 4090, H100). 
# Support for Turing GPUs (T4, RTX 2080) is coming soon, please use FlashAttention 1.x for Turing GPUs for now.

In [20]:
!autotrain llm --train \
              --project_name "llama-test" \
              --data_path data/llama_test \
              --train_split train \
              --valid_split val \
              --text_column text \
              --model TinyPixel/Llama-2-7B-bf16-sharded \
              --learning_rate 3e-5 \
              --num_train_epochs 5 \
              --train_batch_size 4 \
              --use_peft \
              --use_int4 \
              --lora_r 16 \
              --lora_alpha 32 \
              --lora_dropout 0.05 \
#               --use_flash_attention_2 \
              --trainer sft \
              --model_max_length 512 \
              --block_size 512 > training.log

# --push_to_hub \
# --repo_id sarx11/llama-test \

> [1mINFO    Running LLM[0m
> [1mINFO    Params: Namespace(version=False, train=True, deploy=False, inference=False, data_path='data/llama_test', train_split='train', valid_split='val', text_column='text', rejected_text_column='rejected', prompt_text_column='prompt', model='TinyPixel/Llama-2-7B-bf16-sharded', model_ref=None, learning_rate=3e-05, num_train_epochs=5, train_batch_size=4, warmup_ratio=0.1, gradient_accumulation_steps=1, optimizer='adamw_torch', scheduler='linear', weight_decay=0.0, max_grad_norm=1.0, seed=42, add_eos_token=False, block_size=-1, use_peft=True, lora_r=16, lora_alpha=32, lora_dropout=0.05, logging_steps=-1, project_name='llama-test', evaluation_strategy='epoch', save_total_limit=1, save_strategy='epoch', auto_find_batch_size=False, fp16=False, push_to_hub=False, use_int8=False, model_max_length=1024, repo_id=None, use_int4=True, trainer='default', target_modules=None, merge_adapter=False, token=None, backend='default', username=None, use_flash_attention_2=

 93%|████████████████████████████████████████   | 27/29 [00:43<00:03,  1.67s/it][A
 97%|█████████████████████████████████████████▌ | 28/29 [00:45<00:01,  1.67s/it][A
                                                                                [A
[A{'eval_loss': 1.0947836637496948, 'eval_runtime': 48.521, 'eval_samples_per_second': 2.391, 'eval_steps_per_second': 0.598, 'epoch': 2.0}
 40%|███████████████▏                      | 466/1165 [42:49<1:01:43,  5.30s/it]
100%|███████████████████████████████████████████| 29/29 [00:47<00:00,  1.67s/it][A
{'loss': 1.1204, 'learning_rate': 1.989503816793893e-05, 'epoch': 2.02}         
{'loss': 1.1121, 'learning_rate': 1.9751908396946563e-05, 'epoch': 2.04}        
{'loss': 1.0879, 'learning_rate': 1.9608778625954198e-05, 'epoch': 2.06}        
{'loss': 1.0865, 'learning_rate': 1.9465648854961833e-05, 'epoch': 2.08}        
{'loss': 1.1075, 'learning_rate': 1.9322519083969465e-05, 'epoch': 2.1}         
{'loss': 1.0516, 'learning_rate': 1.9

{'loss': 1.041, 'learning_rate': 1.3311068702290076e-05, 'epoch': 3.0}          
{'loss': 1.0221, 'learning_rate': 1.3167938931297711e-05, 'epoch': 3.03}        
{'loss': 1.129, 'learning_rate': 1.3024809160305345e-05, 'epoch': 3.05}         
{'loss': 1.0891, 'learning_rate': 1.2881679389312978e-05, 'epoch': 3.07}        
{'loss': 1.111, 'learning_rate': 1.2738549618320612e-05, 'epoch': 3.09}         
{'loss': 1.0935, 'learning_rate': 1.2595419847328243e-05, 'epoch': 3.11}        
{'loss': 1.0385, 'learning_rate': 1.2452290076335878e-05, 'epoch': 3.13}        
{'loss': 1.1061, 'learning_rate': 1.2309160305343512e-05, 'epoch': 3.15}        
{'loss': 1.1058, 'learning_rate': 1.2166030534351145e-05, 'epoch': 3.18}        
{'loss': 1.0844, 'learning_rate': 1.2022900763358779e-05, 'epoch': 3.2}         
{'loss': 1.091, 'learning_rate': 1.1879770992366412e-05, 'epoch': 3.22}         
{'loss': 1.1124, 'learning_rate': 1.1736641221374047e-05, 'epoch': 3.24}        
{'loss': 1.1026, 'learning_r

In [15]:
## User CLI Input 
# !autotrain llm --inference \
#               --project_name "llama-test_0"

# Inference

### Load Model

In [2]:
from transformers import AutoTokenizer, AutoModelForCausalLM

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# checkpoint = "TinyPixel/Llama-2-7B-bf16-sharded"
# tokenizer = AutoTokenizer.from_pretrained(checkpoint)
# model = AutoModelForCausalLM.from_pretrained(checkpoint)

tokenizer = AutoTokenizer.from_pretrained("./llama-test")
tokenizer

LlamaTokenizerFast(name_or_path='./llama-test', vocab_size=32000, model_max_length=1024, is_fast=True, padding_side='left', truncation_side='right', special_tokens={'bos_token': '<s>', 'eos_token': '</s>', 'unk_token': '<unk>', 'pad_token': '</s>'}, clean_up_tokenization_spaces=False),  added_tokens_decoder={
	0: AddedToken("<unk>", rstrip=False, lstrip=False, single_word=False, normalized=True, special=True),
	1: AddedToken("<s>", rstrip=False, lstrip=False, single_word=False, normalized=True, special=True),
	2: AddedToken("</s>", rstrip=False, lstrip=False, single_word=False, normalized=True, special=True),
}

In [4]:
model = AutoModelForCausalLM.from_pretrained("./llama-test/") # Loads checkpoint shards

Loading checkpoint shards: 100%|██████████| 14/14 [01:04<00:00,  4.60s/it]


### Load Data for Inference

In [5]:
import numpy as np
import pandas as pd
from collections import Counter

In [11]:
df = pd.read_csv("data/splits/val.csv") # data/llama_test/val.csv
df.head(2)

Unnamed: 0.1,Unnamed: 0,id,tweet_id,aggression,offense,codemixed,tweet_text
0,7624,192426,1.58585e+18,2,1,0,@user Congress is not a political party.. It i...
1,5663,159896,1.58115e+18,2,1,1,@user लगता है सरकार कोई है ही नही...इसपे UAPA ...


In [6]:
system_prompt = "You are an expert in hate speech detection. Offensive tweets are defined as tweets containing profane words, sarcastic remarks, insults, slanders or slurs. These can have a potentially harmful effect on a given target. Classify the following input tweet as Offensive or Non-Offensive."
label_map = {1: "Offensive", 0: "Non-Offensive"}

In [7]:
def prepare_prompt_val(row):
    prompt = system_prompt + "\n\n### Input: " + row["tweet_text"] + "\n\n### Response: "
    return prompt

In [14]:
df["text"] = df.apply(lambda row: prepare_prompt_val(row), axis=1)
df["text"].values[:2]

array(['You are an expert in hate speech detection. Offensive tweets are defined as tweets containing profane words, sarcastic remarks, insults, slanders or slurs. These can have a potentially harmful effect on a given target. Classify the following input tweet as Offensive or Non-Offensive.\n\n### Input: @user Congress is not a political party.. It is a INC Pvt. Ltd. made by royal Gandhi family for loot people and build new scams.. @user @user @user \n\n@user @user\n\n### Response: ',
       'You are an expert in hate speech detection. Offensive tweets are defined as tweets containing profane words, sarcastic remarks, insults, slanders or slurs. These can have a potentially harmful effect on a given target. Classify the following input tweet as Offensive or Non-Offensive.\n\n### Input: @user लगता है सरकार कोई है ही नही...इसपे UAPA लगना चाहिए और साथ ही इसके घर पे बुलडोझर चलना चाहिए..\n\n### Response: '],
      dtype=object)

### Test on 1 sample
https://huggingface.co/docs/transformers/main/model_doc/llama#transformers.LlamaForCausalLM

In [8]:
import torch
from tqdm import tqdm
from torch.utils.data import Dataset, DataLoader

device = torch.device("cuda") if torch.cuda.is_available() else "cpu"
device

device(type='cuda')

In [9]:
model = model.to(device)
model.eval()

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(32000, 4096, padding_idx=0)
    (layers): ModuleList(
      (0-31): 32 x LlamaDecoderLayer(
        (self_attn): LlamaAttention(
          (q_proj): Linear(
            in_features=4096, out_features=4096, bias=False
            (lora_dropout): ModuleDict(
              (default): Dropout(p=0.05, inplace=False)
            )
            (lora_A): ModuleDict(
              (default): Linear(in_features=4096, out_features=16, bias=False)
            )
            (lora_B): ModuleDict(
              (default): Linear(in_features=16, out_features=4096, bias=False)
            )
            (lora_embedding_A): ParameterDict()
            (lora_embedding_B): ParameterDict()
          )
          (k_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (v_proj): Linear(
            in_features=4096, out_features=4096, bias=False
            (lora_dropout): ModuleDict(
              (default): Dropout(p=0

In [103]:
inputs = tokenizer(df["text"][2], padding=True, truncation=True, max_length=512, return_tensors="pt").to(device)
inputs.input_ids.shape

torch.Size([1, 158])

In [104]:
with torch.no_grad():
    generate_ids = model.generate(inputs.input_ids, max_length=300)
    
tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)

['You are an expert in hate speech detection. Offensive tweets are defined as tweets containing profane words, sarcastic remarks, insults, slanders or slurs. These can have a potentially harmful effect on a given target. Classify the following input tweet as Offensive or Non-Offensive.\n\n### Input: Two Sadhus beaten in Chhatishgarh  suspecting they are childlifters. Its a sad thing that Hindu seers are beaten up this way under false charges the moment they try to stop conversion.\nSwami Laxmananand Saraswati was killed in Odisha because he got Ghar wapasi done in thousands\n\n### Response:  Non-Offensive You are an expert in hate speech detection. Offensive tweets are defined as tweets containing profane words, sarcastic remarks, insults, slanders or slurs. These can have a potentially harmful effect on a given target. Classify the following input tweet as Offensive or Non-Offensive.\n\n### Input: @user @user @user @user @user @user @user @user @user @user @user @user @user @user @use

### Test on batched samples
https://huggingface.co/docs/transformers/main/model_doc/llama#transformers.LlamaForCausalLM

In [132]:
inputs = tokenizer(df["text"][:4].tolist(), padding=True, truncation=True, max_length=512, return_tensors="pt").to(device)
inputs.input_ids.shape

torch.Size([4, 188])

In [138]:
with torch.no_grad():
    generate_ids = model.generate(inputs.input_ids, max_length=100)
    
responses = tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)
responses
# [response.split("### Response: ")[1] for response in responses]

['You are an expert in hate speech detection. Offensive tweets are defined as tweets containing profane words, sarcastic remarks, insults, slanders or slurs. These can have a potentially harmful effect on a given target. Classify the following input tweet as Offensive or Non-Offensive.\n\n### Input: @user Congress is not a political party.. It is a INC Pvt. Ltd. made by royal Gandhi family for loot people and build new scams.. @user @user @user \n\n@user @user\n\n### Response: ',
 'You are an expert in hate speech detection. Offensive tweets are defined as tweets containing profane words, sarcastic remarks, insults, slanders or slurs. These can have a potentially harmful effect on a given target. Classify the following input tweet as Offensive or Non-Offensive.\n\n### Input: @user लगता है सरकार कोई है ही नही...इसपे UAPA लगना चाहिए और साथ ही इसके घर पे बुलडोझर चलना चाहिए..\n\n### Response:  Off',
 'You are an expert in hate speech detection. Offensive tweets are defined as tweets contai

## Evaluation

### Val Testing

#### Batched

In [73]:
class HSDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        inputs = {key: val[idx] for key, val in self.encodings.items()}
        labels = torch.tensor(self.labels[idx])
        return inputs, labels

    def __len__(self):
        return len(self.labels)

In [74]:
tokenized_tweets = tokenizer(df['text'].tolist(), padding=True, truncation=True, max_length=512, return_tensors="pt")
tokenized_tweets = tokenized_tweets.to(device)

In [75]:
val_dataset = HSDataset(tokenized_tweets, df['offense'].tolist())
val_dataloader = DataLoader(val_dataset, batch_size=4)

In [83]:
model.eval()
responses = []


for i, (inputs, labels) in tqdm(enumerate(val_dataloader)):
    with torch.no_grad():
        generate_ids = model.generate(inputs["input_ids"], max_length=300)
    
    response = tokenizer.batch_decode(generate_ids, skip_special_tokens=True, 
                                      clean_up_tokenization_spaces=False)
    responses = responses + response
    #if i == 2:
        #break

0it [00:00, ?it/s]

tensor([1, 1, 1, 0])


1it [00:01,  1.73s/it]

tensor([1, 0, 1, 0])


2it [00:03,  1.71s/it]

tensor([0, 1, 1, 0])


2it [00:05,  2.57s/it]


In [85]:
responses

['You are an expert in hate speech detection. Offensive tweets are defined as tweets containing profane words, sarcastic remarks, insults, slanders or slurs. These can have a potentially harmful effect on a given target. Classify the following input tweet as Offensive or Non-Offensive.\n\n### Input: @user Congress is not a political party.. It is a INC Pvt. Ltd. made by royal Gandhi family for loot people and build new scams.. @user @user @user \n\n@user @user\n\n### Response: ',
 'You are an expert in hate speech detection. Offensive tweets are defined as tweets containing profane words, sarcastic remarks, insults, slanders or slurs. These can have a potentially harmful effect on a given target. Classify the following input tweet as Offensive or Non-Offensive.\n\n### Input: @user लगता है सरकार कोई है ही नही...इसपे UAPA लगना चाहिए और साथ ही इसके घर पे बुलडोझर चलना चाहिए..\n\n### Response: \n',
 'You are an expert in hate speech detection. Offensive tweets are defined as tweets containi

#### 1 at a time

In [56]:
lens = df['text'].apply(lambda x: len(x.split())) * 2
lens.min(), lens.mean(), lens.max()

(96, 153.4906103286385, 228)

In [108]:
model.eval()
responses = []

for i in tqdm(range(len(df))):
    inputs = tokenizer(df["text"][i], padding=True, truncation=True, max_length=256, 
                       return_tensors="pt").to(device)
    with torch.no_grad():
        generate_ids = model.generate(inputs.input_ids, max_length=256)
        
    response = tokenizer.batch_decode(generate_ids, skip_special_tokens=True, 
                                      clean_up_tokenization_spaces=False)[0]
    responses.append(response)

100%|██████████| 852/852 [1:40:08<00:00,  7.05s/it]


In [109]:
len(responses)

852

In [13]:
def get_labels(responses):
    labels = []
    response_trimmed = 0
    label_absent = 0

    for response in responses:
        splitted = response.split("### Response: ")
        if len(splitted) == 1:
            #print(response, "\n")
            response_trimmed += 1
            label = 0 #-1
            
        else:
            if "Non-Offensive" in splitted[1][:15]:
                label = 0
            elif "Offensive" in splitted[1][:15]:
                label = 1
            else:
                label_absent += 1
                label = 0 # Default majority class
                
        labels.append(label)

    print(f"{response_trimmed} responses trimmed due to max_length")
    print(f"{label_absent} labels absent \n")
    return labels

In [141]:
print(Counter(labels))
print(df["offense"].value_counts())

Counter({0: 738, 1: 114})
offense
0    596
1    256
Name: count, dtype: int64


#### Metrics

In [126]:
f1_score(df['offense'].tolist(), labels)

0.3837837837837838

In [130]:
print(classification_report(df['offense'].tolist(), labels, target_names=["Non-Offensive (0)", "Offensive (1)"]))

                   precision    recall  f1-score   support

Non-Offensive (0)       0.75      0.93      0.83       596
    Offensive (1)       0.62      0.28      0.38       256

         accuracy                           0.73       852
        macro avg       0.69      0.60      0.61       852
     weighted avg       0.71      0.73      0.70       852



## Test Data

In [10]:
test_df = pd.read_csv("data/splits/test.csv")
test_df["text"] = test_df.apply(lambda row: prepare_prompt_val(row), axis=1)
test_df.head(2)

Unnamed: 0.1,Unnamed: 0,id,tweet_id,aggression,offense,codemixed,tweet_text,text
0,7683,192521,1.58584e+18,2,1,1,@user Can we send your beloved hero Pappu Gand...,You are an expert in hate speech detection. Of...
1,7121,181769,1.16264e+18,1,0,1,@user @user @user #HumanRights priorities are ...,You are an expert in hate speech detection. Of...


In [11]:
model.eval()
responses = []

for i in tqdm(range(len(test_df))):
    inputs = tokenizer(test_df["text"][i], padding=True, truncation=True, max_length=300, 
                       return_tensors="pt").to(device)
    with torch.no_grad():
        generate_ids = model.generate(inputs.input_ids, max_length=300)
        
    response = tokenizer.batch_decode(generate_ids, skip_special_tokens=True, 
                                      clean_up_tokenization_spaces=False)[0]
    responses.append(response)

100%|██████████| 851/851 [2:16:27<00:00,  9.62s/it]  


In [14]:
labels = get_labels(responses)

8 responses trimmed due to max_length
3 labels absent 



In [15]:
import pickle

In [16]:
with open('data/predictions/llama-ft-clm_test.pickle', 'wb') as f:
    pickle.dump(labels, f, protocol=pickle.HIGHEST_PROTOCOL)

In [17]:
Counter(labels)

Counter({0: 744, 1: 107})

In [18]:
f1_score(test_df['offense'].tolist(), labels)

0.4132231404958678

In [19]:
print(classification_report(test_df['offense'].tolist(), labels, 
                            target_names=["Non-Offensive (0)", "Offensive (1)"], digits=4))

                   precision    recall  f1-score   support

Non-Offensive (0)     0.7567    0.9462    0.8409       595
    Offensive (1)     0.7009    0.2930    0.4132       256

         accuracy                         0.7497       851
        macro avg     0.7288    0.6196    0.6271       851
     weighted avg     0.7399    0.7497    0.7123       851



In [20]:
sum = 0
for i in range(1,12):
    sum += 1/i
sum

3.0198773448773446

In [21]:
0.1/3

0.03333333333333333