In [3]:
%%capture
!pip install pip3-autoremove
!pip-autoremove torch torchvision torchaudio -y
!pip install torch torchvision torchaudio xformers --index-url https://download.pytorch.org/whl/cu121
!pip install unsloth

In [4]:
from unsloth import FastLanguageModel
import torch
max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.

# 4bit pre quantized models we support for 4x faster downloading + no OOMs.
fourbit_models = [
    "unsloth/Meta-Llama-3.1-8B-bnb-4bit",      # Llama-3.1 2x faster
    "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit",
    "unsloth/Meta-Llama-3.1-70B-bnb-4bit",
    "unsloth/Meta-Llama-3.1-405B-bnb-4bit",    # 4bit for 405b!
    "unsloth/Mistral-Small-Instruct-2409",     # Mistral 22b 2x faster!
    "unsloth/mistral-7b-instruct-v0.3-bnb-4bit",
    "unsloth/Phi-3.5-mini-instruct",           # Phi-3.5 2x faster!
    "unsloth/Phi-3-medium-4k-instruct",
    "unsloth/gemma-2-9b-bnb-4bit",
    "unsloth/gemma-2-27b-bnb-4bit",            # Gemma 2x faster!

    "unsloth/Llama-3.2-1B-bnb-4bit",           # NEW! Llama 3.2 models
    "unsloth/Llama-3.2-1B-Instruct-bnb-4bit",
    "unsloth/Llama-3.2-3B-bnb-4bit",
    "unsloth/Llama-3.2-3B-Instruct-bnb-4bit",
] # More models at https://huggingface.co/unsloth

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit", # or choose "unsloth/Llama-3.2-1B"
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!
==((====))==  Unsloth 2025.2.15: Fast Llama patching. Transformers: 4.48.3.
   \\   /|    GPU: Tesla T4. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu121. CUDA: 7.5. CUDA Toolkit: 12.1. Triton: 3.1.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post1. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/5.70G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/55.5k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/454 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

We now add LoRA adapters so we only need to update 1 to 10% of all parameters!

In [5]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0, # Supports any, but = 0 is optimized
    bias = "none",    # Supports any, but = "none" is optimized
    # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
    random_state = 3407,
    use_rslora = False,  # We support rank stabilized LoRA
    loftq_config = None, # And LoftQ
)

Unsloth 2025.2.15 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


In [8]:
import pandas as pd
data_main=pd.read_csv('/content/eng_train_final.csv')
data_train=pd.read_csv('/content/SentencesExplanations.csv')

In [9]:
df=data_train

In [10]:
df

Unnamed: 0,Sentence,Explanation
0,But not very happy.,"The speaker is expressing dissatisfaction, but..."
1,Well she's not gon na last the whole song like...,The speaker describes improvising to assist du...
2,She sat at her Papa's recliner sofa only to mo...,The behavior indicates a search for comfort or...
3,"Yes, the Oklahoma city bombing.",The reference to a tragic historical event lik...
4,They were dancing to Bolero.,The mention of Bolero suggests a setting of gr...
...,...,...
145,And these were brutal wounds.,The speaker emphasizes the severity of their i...
146,under my skin the itch is unscratched.,The speaker metaphorically describes unresolve...
147,"She laughs, because she's so very nervous.","The speaker describes nervous laughter, which ..."
148,I make it a habit of sitting with my back faci...,The speaker describes a habitual behavior like...


In [11]:
import pandas as pd
from datasets import Dataset

# Example DataFrame
# Assuming you already have a DataFrame `df` with columns "text" and "output" (which contains the explanation)

# Define the instruction for generating explanations
instruction = (
    "Read the given text and generate a short explanation of the emotional or situational context "
    "behind the sentence. The explanation should be concise and relevant to the sentence. "
    "Do not explicitly mention emotions but focus on the implications behind the sentence."
)


# Prepare the dataset for fine-tuning with the appropriate prompt format
alpaca_prompt = """
### Instruction:
{}
### Input:
{}
### Response:
{}"""

EOS_TOKEN = "<|endoftext|>"  # Replace with the tokenizer's EOS token if necessary

# Create the prompts
df["formatted_prompt"] = df.apply(
    lambda row: alpaca_prompt.format(instruction, row["Sentence"], row["Explanation"]) + EOS_TOKEN, axis=1
)

# Convert to Hugging Face dataset
dataset = Dataset.from_pandas(df[["formatted_prompt"]])

# The dataset is now ready for fine-tuning


In [12]:
!export WANDB_MODE=disabled

In [13]:
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    dataset_text_field = "formatted_prompt",
    max_seq_length = max_seq_length,
    dataset_num_proc = 2,
    packing = False, # Can make training 5x faster for short sequences.
    args = TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        max_steps = 30,
        learning_rate = 1e-4,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
        report_to='none',
    ),
)

Converting train dataset to ChatML (num_proc=2):   0%|          | 0/150 [00:00<?, ? examples/s]

Applying chat template to train dataset (num_proc=2):   0%|          | 0/150 [00:00<?, ? examples/s]

Tokenizing train dataset (num_proc=2):   0%|          | 0/150 [00:00<?, ? examples/s]

Truncating train dataset (num_proc=2):   0%|          | 0/150 [00:00<?, ? examples/s]

In [14]:
#@title Show current memory stats
gpu_stats = torch.cuda.get_device_properties(0)
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
print(f"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
print(f"{start_gpu_memory} GB of memory reserved.")

GPU = Tesla T4. Max memory = 14.741 GB.
5.516 GB of memory reserved.


In [15]:
trainer_stats = trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 150 | Num Epochs = 2
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 4
\        /    Total batch size = 8 | Total steps = 30
 "-____-"     Number of trainable parameters = 41,943,040


Step,Training Loss
1,3.1579
2,3.2215
3,3.0969
4,2.9972
5,2.8402
6,2.7804
7,2.509
8,2.2849
9,2.0789
10,1.827


In [None]:
# #@title Show final memory and time stats
# used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
# used_memory_for_lora = round(used_memory - start_gpu_memory, 3)
# used_percentage = round(used_memory         /max_memory*100, 3)
# lora_percentage = round(used_memory_for_lora/max_memory*100, 3)
# print(f"{trainer_stats.metrics['train_runtime']} seconds used for training.")
# print(f"{round(trainer_stats.metrics['train_runtime']/60, 2)} minutes used for training.")
# print(f"Peak reserved memory = {used_memory} GB.")
# print(f"Peak reserved memory for training = {used_memory_for_lora} GB.")
# print(f"Peak reserved memory % of max memory = {used_percentage} %.")
# print(f"Peak reserved memory for training % of max memory = {lora_percentage} %.")

156.0758 seconds used for training.
2.6 minutes used for training.
Peak reserved memory = 6.252 GB.
Peak reserved memory for training = 0.736 GB.
Peak reserved memory % of max memory = 42.412 %.
Peak reserved memory for training % of max memory = 4.993 %.


In [None]:
# model.save_pretrained("lora_model_explanation_generation") # Local saving
# tokenizer.save_pretrained("lora_model_explanation_generation")
# model.push_to_hub("your_name/lora_model", token = "...") # Online saving
# tokenizer.push_to_hub("your_name/lora_model", token = "...") # Online saving

('lora_model_explanation_generation/tokenizer_config.json',
 'lora_model_explanation_generation/special_tokens_map.json',
 'lora_model_explanation_generation/tokenizer.json')

In [None]:
# from unsloth import FastLanguageModel
# import torch

# # Model parameters
# model_name = "/kaggle/working/lora_model_explanation_generation"  # Update with your model's name
# from unsloth import FastLanguageModel
# model, tokenizer = FastLanguageModel.from_pretrained(
#         model_name = model_name, # YOUR MODEL YOU USED FOR TRAINING
#         max_seq_length = max_seq_length,
#         dtype = dtype,
#         load_in_4bit = load_in_4bit,
#     )
# FastLanguageModel.for_inference(model) # Enable native 2x faster inference

In [16]:
FastLanguageModel.for_inference(model)

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): LlamaForCausalLM(
      (model): LlamaModel(
        (embed_tokens): Embedding(128256, 4096, padding_idx=128004)
        (layers): ModuleList(
          (0-31): 32 x LlamaDecoderLayer(
            (self_attn): LlamaAttention(
              (q_proj): lora.Linear4bit(
                (base_layer): Linear4bit(in_features=4096, out_features=4096, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Identity()
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=4096, out_features=16, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=16, out_features=4096, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
              )
              (k_proj): lor

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import pandas as pd
from tqdm import tqdm
from torch.utils.data import DataLoader, Dataset
import torch

# Define the instruction for generating explanations
instruction = (
    "Read the given text and generate a short explanation of the emotional or situational context "
    "behind the sentence. The explanation should be concise and relevant to the sentence. "
    "Do not explicitly mention emotions but focus on the implications behind the sentence."
)

# Define the Alpaca-style prompt
alpaca_prompt = """
### Instruction:
{}
### Input:
{}

### Response:
"""

# Load your new sentences from a CSV file
df_test = pd.read_csv('/kaggle/input/emotion-dataset/eng_test_final.csv')
df_test = df_test

# Custom dataset to handle input prompts
class SentenceDataset(Dataset):
    def __init__(self, dataframe):
        self.texts = dataframe["text"].tolist()

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        return alpaca_prompt.format(instruction, self.texts[idx])

# Create the dataset and dataloader
batch_size = 16
dataset = SentenceDataset(df_test)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False)


# Function to generate explanations for a batch
def generate_explanations_batch(batch):
    # Tokenize the batch
    inputs = tokenizer(
        batch,
        return_tensors="pt",
        padding=True,
        truncation=True,
        max_length=512,
    ).to("cuda")
    # Disable gradient computation for faster inference
        # Generate outputs
    outputs = model.generate(  # Directly use the model without DataParallel
    input_ids = inputs.input_ids, attention_mask = inputs.attention_mask,
        max_new_tokens=150,
        use_cache=True,
    )

    # Decode the generated outputs
    decoded_outputs = tokenizer.batch_decode(outputs, skip_special_tokens=True)

    # Extract explanations
    explanations = []
    for decoded_output in decoded_outputs:
        response_start = decoded_output.find("### Response:") + len("### Response:")
        response_end = decoded_output.find("<|endoftext|>")
        response = decoded_output[response_start:response_end].strip()
        explanations.append(response)

    return explanations

# Generate explanations and update the DataFrame
generated_explanations = []
for batch in tqdm(dataloader, desc="Generating Explanations"):
    explanations = generate_explanations_batch(batch)
    generated_explanations.extend(explanations)

# Add explanations to the DataFrame
df_test["generated_explanation"] = generated_explanations

# Print the resulting DataFrame
print(df_test[["text", "generated_explanation"]])


Generating Explanations: 100%|██████████| 173/173 [2:01:03<00:00, 41.99s/it] 

                                                   text  \
0     / o \ So today I went in for a new exam with D...   
1     The image I have in my mind is this: a group o...   
2     I slammed my fist against the door and yelled,...   
3                          I could not unbend my knees.   
4     I spent the night at the hotel, mostly hanging...   
...                                                 ...   
2762                            Better late then never!   
2763  In the last three weeks, I have started lookin...   
2764      But I never fell out, so it wasn't a problem.   
2765  " So I will remain positive for as long as I l...   
2766                          `` Bella my head is fine.   

                                  generated_explanation  
0     The speaker describes a stressful situation, l...  
1     The speaker describes a metaphorical emotional...  
2     The speaker's frustration or desperation promp...  
3     The speaker describes a physical limitation, p...  
4




In [None]:
df_test.to_csv('df_test_with_explanation.csv',index=False)