 # Notebook to finetune Mistral 7B for sarcasm detection

## Install necessary libraries

In [None]:
!pip install -q -U transformers
!pip install -q -U accelerate
!pip install -q -U bitsandbytes
!pip install -q -U datasets scipy ipywidgets
!pip install -q -U git+https://github.com/huggingface/peft.git
!pip install -q bitsandbytes trl peft

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m297.6/297.6 kB[0m [31m6.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m119.8/119.8 MB[0m [31m13.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m542.0/542.0 kB[0m [31m9.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m38.6/38.6 MB[0m [31m41.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m139.4/139.4 kB[0m [31m19.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m17.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m194.1/194.1 kB[0m [31m20.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m18.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━

## Import necessary libraries

In [None]:
import pandas as pd
import numpy as np
import io
from datasets import Dataset
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import prepare_model_for_kbit_training
from peft import LoraConfig, get_peft_model
from peft import AutoPeftModelForCausalLM,PeftConfig
import transformers
from datetime import datetime
from trl import SFTTrainer
from tqdm import tqdm

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Load Dataset

In [None]:
column_names_df  = pd.read_csv('/content/drive/MyDrive/Mistral_finetune/Datasets/SarcasmDetection/key.csv',on_bad_lines='skip',encoding='utf-8',sep='\t')
column_names_list = column_names_df.columns
#print(column_names_list)

In [None]:
train_pd_full = pd.read_csv('/content/drive/MyDrive/Mistral_finetune/Datasets/SarcasmDetection/train-balanced.csv',on_bad_lines='skip',header=None,encoding='utf-8',sep='\t' )
train_pd_full.columns = column_names_list
#train_pd_full

In [None]:
test_pd_full = pd.read_csv('/content/drive/MyDrive/Mistral_finetune/Datasets/SarcasmDetection/test-balanced.csv',on_bad_lines='skip',header=None,encoding='utf-8',sep='\t' )
test_pd_full.columns = column_names_list
#test_pd_full

## Create Data subset for faster training and evaluation

In [None]:
def create_data_subset(df, sample_size):
  #samplesize - Samples of each label; returns a dataset of 2*samplesize
  # Separate data by label
  df_label_0 = df[df['label'] == 0]
  df_label_1 = df[df['label'] == 1]

  subset_label_0 = df_label_0.sample(sample_size)
  subset_label_1 = df_label_1.sample(sample_size)

  # Combine the subsets
  balanced_subset = pd.concat([subset_label_0, subset_label_1])

  # Shuffle the rows (optional)
  balanced_subset = balanced_subset.sample(frac=1).reset_index(drop=True)
  return balanced_subset


In [None]:
print('Actual data size of the full train dataframe',train_pd_full.shape)
print('Actual data size of the full test dataframe',test_pd_full.shape)

Actual data size of the full train dataframe (56962, 10)
Actual data size of the full test dataframe (14452, 10)


In [None]:
## Number of rows for each label
train_sample_size = 500
train_data_subset = train_pd_full[['label','comment','parent_comment']]
train_data_subset=create_data_subset(train_data_subset,train_sample_size)
print('Data size of the subset train dataframe',train_data_subset.shape)
print('Rows with label:1 ',train_data_subset[train_data_subset['label']==1].shape)
print('Rows with label:0',train_data_subset[train_data_subset['label']==0].shape)

Data size of the subset train dataframe (1000, 3)
Rows with label:1  (500, 3)
Rows with label:0 (500, 3)


In [None]:
## Number of rows for each label
test_sample_size = 50
test_data_subset = test_pd_full[['label','comment','parent_comment']]
test_data_subset=create_data_subset(test_data_subset,test_sample_size)
print('Data size of the subset train dataframe',test_data_subset.shape)
print('Rows with label:1 ',test_data_subset[test_data_subset['label']==1].shape)
print('Rows with label:0',test_data_subset[test_data_subset['label']==0].shape)

Data size of the subset train dataframe (100, 3)
Rows with label:1  (50, 3)
Rows with label:0 (50, 3)


### Convert pandas dataframe to a dataset

In [None]:
train_dataset = Dataset.from_pandas(train_data_subset)
test_dataset = Dataset.from_pandas(test_data_subset)
print(train_dataset)
print(test_dataset)

Dataset({
    features: ['label', 'comment', 'parent_comment'],
    num_rows: 1000
})
Dataset({
    features: ['label', 'comment', 'parent_comment'],
    num_rows: 100
})


## LOAD BASE MODEL AND CONFIGs

In [None]:
## Base model - Pretrained LLM you want to eventually finetune
base_model_name = 'mistralai/Mistral-7B-v0.1'

## Quantization Config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)
## Load the pretrained model after quantization
model = AutoModelForCausalLM.from_pretrained(base_model_name, quantization_config=bnb_config)



config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

`low_cpu_mem_usage` was None, now set to True since model is quantized.


model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.94G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

## TOKENIZATION
### Tokenize the input data along with prompt

In [None]:
## set up tokenizer parameters

tokenizer = AutoTokenizer.from_pretrained(
    base_model_name,
    model_max_length=512,  ## Max Length of input to the model (play around with it may be )
    padding_side="left",
    add_eos_token=True)
tokenizer.pad_token = tokenizer.eos_token

def tokenize(prompt):
    result = tokenizer(
        prompt,
        truncation=True,
        max_length=512, ## Max Length of input to the model (play around with it may be )
        padding="max_length",
    )
    result["labels"] = result["input_ids"].copy()
    return result

tokenizer_config.json:   0%|          | 0.00/967 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/72.0 [00:00<?, ?B/s]

## PROMPT GENERATION

### Train Prompt

In [None]:
label_to_text = ["Not Sarcasm", "Sarcasm"]

In [None]:
def generate_train_prompt(data_point):
    full_prompt =f"""You are a sarcasm detection bot for social media posts. Your task is to assess the comment and its reply and categorize the reply in context of the comment after <<< >>> into one of the following predefined categories:
    Sarcasm
    Not Sarcasm

    ####
    Here are some examples:
    comment: Most of the Bernie people got what they wanted on the platform, or most of it.
    reply: dont trigger them with facts!
    Category: Sarcasm

    comment: The Dallas Morning News: What you need to know about the enemies of the American people the president warned you about
    reply: What a brilliant and well written article that was.
    Category: Not Sarcasm

    If the text doesn't fit into any of the above categories, classify it as:
    Not Sarcasm
    <<<
    comment:
    {data_point["parent_comment"]}
    reply:
    {data_point["comment"]}
    >>>
    Category: {label_to_text[data_point["label"]]}
    """
    return full_prompt

### PROMPT ENGINEERING
#### Evaluating the perforance of a zero shot prompt on test data on pretrained base model


#### Evaluation Prompt

In [None]:
## ith example
i=1#72
print("Parent comment: " + train_dataset[i]['parent_comment'])
print("Child comment: " + train_dataset[i]['comment'])
print("Label: " + str(train_dataset[i]['label']) + "\n")

Parent comment: next speaker, please
Child comment: Yea, shouldn't this rando black guy be in jail?
Label: 1



In [None]:
def generate_eval_prompt(data_point):
    full_prompt =f"""You are a sarcasm detection bot for social media posts. Your task is to assess the comment and its reply and categorize the reply in context of the comment after <<< >>> into one of the following predefined categories:
    Sarcasm
    Not Sarcasm

    ####
    Here are some examples:
    comment: Most of the Bernie people got what they wanted on the platform, or most of it.
    reply: dont trigger them with facts!
    Category: Sarcasm

    comment: The Dallas Morning News: What you need to know about the enemies of the American people the president warned you about
    reply: What a brilliant and well written article that was.
    Category: Not Sarcasm

    If the text doesn't fit into any of the above categories, classify it as:
    Not Sarcasm
    <<<
    comment:
    {data_point["parent_comment"]}
    reply:
    {data_point["comment"]}
    >>>
    Catgeory:
    """
    return full_prompt

#### Testing

In [None]:
## ith example
i=40
print("Parent comment: " + test_dataset[i]['parent_comment'])
print("Child comment: " + test_dataset[i]['comment'])
print("Label: " + str(test_dataset[i]['label']) + "\n")

Parent comment: They detest political correctness; just don't call them NAZIS
Child comment: hey, that might hurt their feelings!
Label: 1



In [None]:
eval_prompt = generate_eval_prompt(test_dataset[i])
model_input = tokenizer(eval_prompt, return_tensors="pt").to("cuda")
model.eval()
with torch.no_grad():
    print(tokenizer.decode(model.generate(**model_input, max_new_tokens=256, pad_token_id=2)[0], skip_special_tokens=True))

#### Tokenize the prompts

In [None]:
def generate_and_tokenize_train_prompt(data_point):
  return {'text':generate_train_prompt(data_point)}

def generate_and_tokenize_eval_prompt(data_point):
  return{'text':generate_eval_prompt(data_point)}

## EVALUATION LOOP - FEW SHOT PROMPT ENGINEERING ON BASE MODEL

In [None]:
n= len(test_dataset)
print(n)
basemodel_results_df= pd.DataFrame(columns=['model_raw_op'])
for i in tqdm(range(n)):
  eval_prompt = generate_eval_prompt(test_dataset[i])
  model_input = tokenizer(eval_prompt, return_tensors="pt").to("cuda")
  model.eval()
  with torch.no_grad():
    decoded_op=tokenizer.decode(model.generate(**model_input, max_new_tokens=256, pad_token_id=2)[0], skip_special_tokens=True)
    basemodel_results_df.loc[i]= [decoded_op]

100


  0%|          | 0/100 [00:00<?, ?it/s]A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
  1%|          | 1/100 [00:21<35:48, 21.70s/it]A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
  2%|▏         | 2/100 [00:42<34:39, 21.22s/it]A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
  3%|▎         | 3/100 [01:03<34:02, 21.06s/it]A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
  4%|▍         | 4/100 [01:24<33:31, 20.95s/it]A decoder-only architecture is being used, but right-padding was detected! For co

In [None]:
basemodel_results_raw = test_dataset.to_pandas()
basemodel_results_raw['model_raw_op'] = basemodel_results_df['model_raw_op']

In [None]:
import os
raw_output_path = "/content/drive/MyDrive/Mistral_finetune/output/basemodel/"
# Option 1: Using os.path.join()
csv_filename = 'raw_basemodel_results.csv'
full_csv_path = os.path.join(raw_output_path, csv_filename)

# Now save the DataFrame to the specified location
basemodel_results_raw.to_csv(full_csv_path, sep='\t', index=False)

#### Read basemodel_results_raw from folder location if not in session

In [None]:
basemodel_results_raw[10:20]

Unnamed: 0,label,comment,parent_comment,model_raw_op
10,0,American frogs have toad the line for the gay ...,Ribbit for his pleasure,You are a sarcasm detection bot for social med...
11,0,Anyone got a bunker they'd be willing to share?,Donald Trump to be handed nuclear codes despit...,You are a sarcasm detection bot for social med...
12,0,Revolt.,Wealthy Would Get Billions in Tax Cuts Under O...,You are a sarcasm detection bot for social med...
13,0,I'm sure his best friend is black.,Mississippi lawmaker under fire for claiming '...,You are a sarcasm detection bot for social med...
14,0,He's off to a great start considering his visi...,Basically saying let's reduce poverty by slash...,You are a sarcasm detection bot for social med...
15,0,Jesus Christ SCOTUS is made up of fossils.,The next President is not going to appoint a C...,You are a sarcasm detection bot for social med...
16,0,"lol, can't answer question, gets triggered, st...","Keep copy pasting the same shit, I'm sure it h...",You are a sarcasm detection bot for social med...
17,1,You're just racist,It's also worth noting that Republicans won bi...,You are a sarcasm detection bot for social med...
18,1,"Yeah, we could all really come together if a p...",Good both parties should be smashed up into ti...,You are a sarcasm detection bot for social med...
19,1,If the government would just get out the way a...,But the free market will solve those money iss...,You are a sarcasm detection bot for social med...


In [None]:
test_ip = basemodel_results_raw['model_raw_op'][13]
print(test_ip)

You are a sarcasm detection bot for social media posts. Your task is to assess the comment and its reply and categorize the reply in context of the comment after <<< >>> into one of the following predefined categories:
    Sarcasm 
    Not Sarcasm

    #### 
    Here are some examples:
    comment: Most of the Bernie people got what they wanted on the platform, or most of it.
    reply: dont trigger them with facts!
    Category: Sarcasm

    comment: The Dallas Morning News: What you need to know about the enemies of the American people the president warned you about
    reply: What a brilliant and well written article that was.
    Category: Not Sarcasm

    If the text doesn't fit into any of the above categories, classify it as:
    Not Sarcasm
    <<<
    comment:
    Mississippi lawmaker under fire for claiming 'all the blacks' in his town get food stamps, don't work
    reply: 
    I'm sure his best friend is black.
    >>>
    Catgeory: 
    
    Not Sarcasm

    <<<
    commen

### Preprocessing function to retrieve output category from LLM output

In [None]:
def find_first_word_basemodel(text):
    """
    Finds the first word in the input text after removing leading spaces and newlines.
    Returns the first word or None if no word is found.
    """
    # Search for the specified string
    start_index = text.find(">>>\n    Catgeory:")
    if start_index == -1:
        return None  # String not found

    # Extract the substring after the specified string
    remaining_text = text[start_index + len(">>>\n    Catgeory:"):]

    # Remove leading spaces and newlines
    cleaned_text = remaining_text.lstrip()
    #print('cleaned_text',cleaned_text)
    # Find the first word
    words = cleaned_text.split()
    #print('words',words)
    if words:
      if words[0]=='Sarcasm':
        return 'Sarcasm'
      elif words[0]=='Not':
        return 'Not Sarcasm'
      else:
        return 'None'
    else:
        return None  # No word found

In [None]:
# Example usage:
result = find_first_word_basemodel(test_ip)
print('The output is --->',result)  # Output: "Sarcasm"

The output is ---> Not Sarcasm


In [None]:
basemodel_results_raw["predicted_category"] = basemodel_results_raw["model_raw_op"].apply(find_first_word_basemodel)

In [None]:
def text_to_binary(text):
  if text=='Sarcasm':
    return 1
  elif text=='Not Sarcasm':
    return 0
  else:
    return 2

In [None]:
basemodel_results_raw["predicted_category_bn"] =basemodel_results_raw["predicted_category"].apply(text_to_binary)

In [None]:
basemodel_results_raw

Unnamed: 0,label,comment,parent_comment,model_raw_op,predicted_category,predicted_category_bn
0,1,Carson: The black guy my preacher said we were...,Dr. Ben Carson surges into 2nd place in Iowa P...,You are a sarcasm detection bot for social med...,Sarcasm,1
1,1,"Yeah, a rich, white WASPy *wo*man is totally d...",Ever since her listening tour in 1999 where sh...,You are a sarcasm detection bot for social med...,Sarcasm,1
2,1,"Yeah, this study was obviously a plot to distr...",Don't forget BENGHAZI!!!!,You are a sarcasm detection bot for social med...,Sarcasm,1
3,0,Because only sith deal in absolutes.,"Why is opposing Trump's agenda ""America hating""?",You are a sarcasm detection bot for social med...,Sarcasm,1
4,0,"I'd like to read about that, is there a link?",Yeah that's sad. They're wrong about that. But...,You are a sarcasm detection bot for social med...,Sarcasm,1
...,...,...,...,...,...,...
95,1,"Yes, It's a giant conspiracy to take 3 delegat...",The mainstream media is lying about this whole...,You are a sarcasm detection bot for social med...,,2
96,1,"No, when these tribes conquered lands they ass...",Did any of these tribes ethnically cleanse and...,You are a sarcasm detection bot for social med...,Sarcasm,1
97,1,You're right we should be terrified.,"Keep that sense of righteousness strong, you'l...",You are a sarcasm detection bot for social med...,Sarcasm,1
98,0,Because it's an Internet bet,Only $10? Why not $100 or $500?,You are a sarcasm detection bot for social med...,Not Sarcasm,0


In [None]:
basemodel_results_raw["predicted_category_bn"].value_counts()

predicted_category_bn
1    69
2    19
0    12
Name: count, dtype: int64

### Evaluation metrics on base model results

In [None]:
import os
raw_output_path = "/content/drive/MyDrive/Mistral_finetune/output/basemodel/"
# Option 1: Using os.path.join()
csv_filename = 'processed_basemodel_results.csv'
full_csv_path = os.path.join(raw_output_path, csv_filename)

# Now save the DataFrame to the specified location
basemodel_results_raw.to_csv(full_csv_path, sep='\t', index=False)

In [None]:
from sklearn.metrics import classification_report

# Example ground truth and predicted labels
y_true = basemodel_results_raw['label']
y_pred = basemodel_results_raw['predicted_category_bn']

# Define target names (optional)
#target_names = ['class 0', 'class 1', 'class 2']

# Generate the classification report
report = classification_report(y_true, y_pred)

print(report)

              precision    recall  f1-score   support

           0       0.67      0.16      0.26        50
           1       0.54      0.74      0.62        50
           2       0.00      0.00      0.00         0

    accuracy                           0.45       100
   macro avg       0.40      0.30      0.29       100
weighted avg       0.60      0.45      0.44       100



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


## FINETUNE SECTION

In [None]:
tokenized_train_dataset = train_dataset.map(generate_and_tokenize_train_prompt)
tokenized_train_dataset=tokenized_train_dataset.remove_columns(['label','comment','parent_comment'])

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

In [None]:
# based on config
training_args = transformers.TrainingArguments(
    fp16=False, # specify bf16=True instead when training on GPUs that support bf16
    do_eval=False,
    bf16=False,
    optim="paged_adamw_8bit",
    #evaluation_strategy="epoch",
    gradient_accumulation_steps=8,
    #gradient_checkpointing=True,
    #gradient_checkpointing_kwargs={"use_reentrant": False},
    learning_rate=2.0e-05,
    log_level="info",
    weight_decay=0.001,
    logging_steps=10,
    logging_strategy="steps",
    lr_scheduler_type="constant",
    # max_steps=1000000,
    num_train_epochs=4,
    output_dir=output_dir,
    overwrite_output_dir=True,
    per_device_eval_batch_size=1, # originally set to 8
    per_device_train_batch_size=1, # originally set to 8
    # push_to_hub=True,
    # hub_model_id="zephyr-7b-sft-lora",
    # hub_strategy="every_save",
    # report_to="tensorboard",
    save_strategy="steps",
    save_steps=1000,
    seed=42,
    warmup_ratio=0.3
)

# based on config
config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=[
        "q_proj",
        "k_proj",
        "v_proj",
        "o_proj",
        "gate_proj",
        "up_proj",
        "down_proj",
        "lm_head",
    ],
    bias="none",
    lora_dropout=0.05,  # Conventional
    task_type="CAUSAL_LM",
)


trainer = SFTTrainer(
        model=model,
        #model_init_kwargs=model_kwargs,
        args=training_args,
        train_dataset=tokenized_train_dataset,
        #eval_dataset=eval_dataset,
        dataset_text_field="text",
        tokenizer=tokenizer,
        packing=False,
        peft_config=config,
        max_seq_length=512
    )
trainer.train()

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

max_steps is given, it will override any value given in num_train_epochs
***** Running training *****
  Num examples = 1,000
  Num Epochs = 4,000
  Instantaneous batch size per device = 1
  Total train batch size (w. parallel, distributed & accumulation) = 4
  Gradient Accumulation steps = 4
  Total optimization steps = 1,000,000
  Number of trainable parameters = 21,260,288


Step,Training Loss
10,2.0852
20,1.4251
30,0.7876
40,0.5066
50,0.4971
60,0.5075
70,0.4517
80,0.5539
90,0.534
100,0.4968


Saving model checkpoint to /content/drive/MyDrive/Mistral_finetune/finetuned_modelsmistral-run_1/checkpoint-250
tokenizer config file saved in /content/drive/MyDrive/Mistral_finetune/finetuned_modelsmistral-run_1/checkpoint-250/tokenizer_config.json
Special tokens file saved in /content/drive/MyDrive/Mistral_finetune/finetuned_modelsmistral-run_1/checkpoint-250/special_tokens_map.json
Saving model checkpoint to /content/drive/MyDrive/Mistral_finetune/finetuned_modelsmistral-run_1/checkpoint-500
tokenizer config file saved in /content/drive/MyDrive/Mistral_finetune/finetuned_modelsmistral-run_1/checkpoint-500/tokenizer_config.json
Special tokens file saved in /content/drive/MyDrive/Mistral_finetune/finetuned_modelsmistral-run_1/checkpoint-500/special_tokens_map.json
Saving model checkpoint to /content/drive/MyDrive/Mistral_finetune/finetuned_modelsmistral-run_1/checkpoint-750
tokenizer config file saved in /content/drive/MyDrive/Mistral_finetune/finetuned_modelsmistral-run_1/checkpoint-

Step,Training Loss
10,2.0852
20,1.4251
30,0.7876
40,0.5066
50,0.4971
60,0.5075
70,0.4517
80,0.5539
90,0.534
100,0.4968


KeyboardInterrupt: 

## LOAD FINETUNED MODEL CHECKPOINTS

#### Checkpoint after 2nd epoch

In [None]:
## Model after epoch 1
project = "run_1"
base_model_name = "mistral"
run_name = base_model_name + "-" + project
checkpoint = 'checkpoint-2000'#'checkpoint-1000'
output_dir = "/content/drive/MyDrive/Mistral_finetune/finetuned_models" + run_name

peft_model_path_1 = output_dir+'/'+checkpoint
print(peft_model_path_1)
ft_model_ep1 = AutoPeftModelForCausalLM.from_pretrained(peft_model_path_1,quantization_config=bnb_config)

/content/drive/MyDrive/Mistral_finetune/finetuned_modelsmistral-run_1/checkpoint-2000


`low_cpu_mem_usage` was None, now set to True since model is quantized.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
# eval_prompt = generate_eval_prompt(test_dataset[i])
# model_input = tokenizer(eval_prompt, return_tensors="pt").to("cuda")
# ft_model_ep1.eval()
# with torch.no_grad():
#     print(tokenizer.decode(ft_model_ep1.generate(**model_input, max_new_tokens=256, pad_token_id=2)[0], skip_special_tokens=True))

## EVALUATION LOOP

In [None]:
from tqdm import tqdm

### Write the evaluation result in a new column of test dataset along with existing columns - label, comment and parent_comment

In [None]:
n= len(test_dataset)
print(n)
finetuned_results_df= pd.DataFrame(columns=['model_raw_op'])
for i in tqdm(range(n)):
  eval_prompt = generate_eval_prompt(test_dataset[i])
  model_input = tokenizer(eval_prompt, return_tensors="pt").to("cuda")
  ft_model_ep1.eval()
  with torch.no_grad():
    decoded_op=tokenizer.decode(ft_model_ep1.generate(**model_input, max_new_tokens=256, pad_token_id=2)[0], skip_special_tokens=True)
    finetuned_results_df.loc[i]= [decoded_op]



100


  0%|          | 0/100 [00:00<?, ?it/s]A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
  1%|          | 1/100 [00:35<58:22, 35.38s/it]A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
  2%|▏         | 2/100 [01:10<57:40, 35.31s/it]A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
  3%|▎         | 3/100 [01:45<57:05, 35.31s/it]A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
  4%|▍         | 4/100 [02:21<56:40, 35.42s/it]A decoder-only architecture is being used, but right-padding was detected! For co

In [None]:
finetuned_results_raw = test_dataset.to_pandas()
finetuned_results_raw['model_raw_op'] = finetuned_results_df['model_raw_op']


In [None]:
import os
raw_output_path = "/content/drive/MyDrive/Mistral_finetune/output/run1-epoch2/"
# Option 1: Using os.path.join()
csv_filename = 'raw_finetuned_results.csv'
full_csv_path = os.path.join(raw_output_path, csv_filename)

# Now save the DataFrame to the specified location
finetuned_results_raw.to_csv(full_csv_path, sep='\t', index=False)


In [None]:
finetuned_results_raw

## Load raw outputs from finetuned model and fetch the catgeory
### - Can be done independently after finetuning and above code need not to be run

In [None]:
import os
raw_output_path = "/content/drive/MyDrive/Mistral_finetune/output/run1-epoch2/"
# Option 1: Using os.path.join()
csv_filename = 'raw_finetuned_results.csv'
full_csv_path = os.path.join(raw_output_path, csv_filename)

finetuned_results_raw_ip = pd.read_csv(full_csv_path,sep='\t')
finetuned_results_raw_ip

Unnamed: 0,label,comment,parent_comment,model_raw_op
0,1,Carson: The black guy my preacher said we were...,Dr. Ben Carson surges into 2nd place in Iowa P...,You are a sarcasm detection bot for social med...
1,1,"Yeah, a rich, white WASPy *wo*man is totally d...",Ever since her listening tour in 1999 where sh...,You are a sarcasm detection bot for social med...
2,1,"Yeah, this study was obviously a plot to distr...",Don't forget BENGHAZI!!!!,You are a sarcasm detection bot for social med...
3,0,Because only sith deal in absolutes.,"Why is opposing Trump's agenda ""America hating""?",You are a sarcasm detection bot for social med...
4,0,"I'd like to read about that, is there a link?",Yeah that's sad. They're wrong about that. But...,You are a sarcasm detection bot for social med...
...,...,...,...,...
95,1,"Yes, It's a giant conspiracy to take 3 delegat...",The mainstream media is lying about this whole...,You are a sarcasm detection bot for social med...
96,1,"No, when these tribes conquered lands they ass...",Did any of these tribes ethnically cleanse and...,You are a sarcasm detection bot for social med...
97,1,You're right we should be terrified.,"Keep that sense of righteousness strong, you'l...",You are a sarcasm detection bot for social med...
98,0,Because it's an Internet bet,Only $10? Why not $100 or $500?,You are a sarcasm detection bot for social med...


In [None]:
finetuned_results_raw_ip[10:20]

In [None]:
test_ip = finetuned_results_raw_ip['model_raw_op'][11]
print(test_ip)

In [None]:
def find_first_word(text):
    """
    Finds the first word in the input text after removing leading spaces and newlines.
    Returns the first word or None if no word is found.
    """
    # Search for the specified string
    start_index = text.find(">>>\n    Catgeory:")
    if start_index == -1:
        return None  # String not found

    # Extract the substring after the specified string
    remaining_text = text[start_index + len(">>>\n    Catgeory:"):]

    # Remove leading spaces and newlines
    cleaned_text = remaining_text.lstrip()
    #print('cleaned_text',cleaned_text)
    # Find the first word
    words = cleaned_text.split()
    #print('words',words)
    if words:
      if words[0]=='Not':
        return 'Not Sarcasm'
      else:
        return words[0]
    else:
        return None  # No word found

In [None]:
# Example usage:
result = find_first_word(test_ip)
print('The output is --->',result)  # Output: "Sarcasm"

The output is ---> Sarcasm


In [None]:
finetuned_results_raw_ip["predicted_category"] = finetuned_results_raw_ip["model_raw_op"].apply(find_first_word)

In [None]:
def text_to_binary(text):
  if text=='Sarcasm':
    return 1
  else:
    return 0

In [None]:
finetuned_results_raw_ip["predicted_category_bn"] =finetuned_results_raw_ip["predicted_category"].apply(text_to_binary)

In [None]:
finetuned_results_raw_ip

In [None]:
import os
raw_output_path = "/content/drive/MyDrive/Mistral_finetune/output/run1-epoch2/"
# Option 1: Using os.path.join()
csv_filename = 'processed_finetuned_results.csv'
full_csv_path = os.path.join(raw_output_path, csv_filename)

# Now save the DataFrame to the specified location
finetuned_results_raw_ip.to_csv(full_csv_path, sep='\t', index=False)

In [None]:
from sklearn.metrics import classification_report

# Example ground truth and predicted labels
y_true = finetuned_results_raw_ip['label']
y_pred = finetuned_results_raw_ip['predicted_category_bn']

# Define target names (optional)
#target_names = ['class 0', 'class 1', 'class 2']

# Generate the classification report
report = classification_report(y_true, y_pred)

print(report)


              precision    recall  f1-score   support

           0       0.73      0.94      0.82        50
           1       0.92      0.66      0.77        50

    accuracy                           0.80       100
   macro avg       0.83      0.80      0.80       100
weighted avg       0.83      0.80      0.80       100

