In [1]:
import pandas as pd

In [2]:
df_train = pd.read_csv("sData/df_train.csv")
df_train['Label'] = df_train['Label'].replace({0.0 : 'Negative', 1.0 : 'Positive', 2.0 : 'Neutral'})
df_train.head()

Unnamed: 0,Comment,Label
0,"সিনেমাটি একবার দেখার মতো, দ্বিতীয়বার দেখার ইচ্...",Neutral
1,"তুফান কেন মানুষ না, পশু না, রাক্ষস, সেই গল্প ন...",Positive
2,"পরিচালক যদি আরও উন্নত গল্প নিয়ে আসতেন, তবে আর...",Negative
3,"চঞ্চল চৌধুরী ও জয়া আহসান ভালো অভিনয় করেছেন, কি...",Neutral
4,"আমি মনে করি, আরও দশ-পনের মিনিট ছোট করা যেত।",Negative


In [3]:
df_val = pd.read_csv("sData/df_valid.csv")
df_val['Label'] = df_val['Label'].replace({0.0 : 'Negative', 1.0 : 'Positive', 2.0 : 'Neutral'})
df_val.head()


Unnamed: 0,Comment,Label
0,মুজিবের ঘন গোঁফের সঠিক রূপ ফুটিয়ে তুলতে পারেনি...,Negative
1,নতুন জুটি হিসেবে সিয়াম আর পূজার অভিনয় মনোমুগ...,Positive
2,আর অভিনেতা হিসাবে চঞ্চাল চৌধুরী একদম ১০০ তে ১০...,Positive
3,সিনেমাটি খুবই মসৃণ ছিল না। শেষ পর্যন্ত আশা ছিল...,Negative
4,"ভালো চেষ্টার পরেও নির্মাণটা জোরালো হয়নি, কয়ে...",Negative


In [4]:
df_test = pd.read_csv("sData/df_test.csv")
df_test['Label'] = df_test['Label'].replace({0.0 : 'Negative', 1.0 : 'Positive', 2.0 : 'Neutral'})
df_test.head()

Unnamed: 0,Comment,Label
0,"চরিত্রের মধ্যে গভীরতা কম ছিল, আরো ইমোশনাল কন্ট...",Negative
1,"গল্পটি যে সমাজের জন্য কতটা নেতিবাচক, সেটা ভাবল...",Negative
2,"ট্রেলার দেখে মনে হচ্ছে, পোস্ট প্রোডাকশন এখনও ব...",Positive
3,"খুব ভালো লাগেনি, তবে পুরোপুরি খারাপও ছিল না।",Neutral
4,"আয়নাবাজি বেশ ভালো ছিল, তবে কাহিনী কিছুটা পূর্...",Neutral


In [5]:
print(f"Training Data size : {df_train.shape}")
print(f"Validation Data size : {df_val.shape}")
print(f"Testing Data size : {df_test.shape}")

Training Data size : (7200, 2)
Validation Data size : (900, 2)
Testing Data size : (900, 2)


In [6]:
X_train = df_train
X_eval = df_val
X_test = df_val

def generate_prompt(data_point):
    return f"""
           Classify the text into Neutral, Positive, and Negative, and return the answer as the corresponding text label.
text: {data_point["Comment"]}
label: {data_point["Label"]}""".strip()

def generate_test_prompt(data_point):
    return f"""
            Classify the text into Neutral, Positive, and Negative, and return the answer as the corresponding text label.
text: {data_point["Comment"]}
label: """.strip()

X_train.loc[:,'Comment'] = X_train.apply(generate_prompt, axis=1)
X_eval.loc[:,'Comment'] = X_eval.apply(generate_prompt, axis=1)

y_true = X_test.loc[:,'Label']
X_test = pd.DataFrame(X_test.apply(generate_test_prompt, axis=1), columns=["Comment"])

In [7]:
X_train.Label.value_counts()

Label
Positive    2427
Negative    2387
Neutral     2386
Name: count, dtype: int64

In [8]:
y_true.value_counts()

Label
Negative    314
Neutral     309
Positive    277
Name: count, dtype: int64

In [9]:
from datasets import Dataset

train_data = Dataset.from_pandas(X_train[["Comment"]])
eval_data = Dataset.from_pandas(X_eval[["Comment"]])

  from .autonotebook import tqdm as notebook_tqdm


In [10]:
from transformers import AutoModelForCausalLM, BitsAndBytesConfig

In [11]:
model_name = "meta-llama/Llama-3.2-1B"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=False,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype="float16",
)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    torch_dtype="float16",
    quantization_config=bnb_config, 
)

model.config.use_cache = False
model.config.pretraining_tp = 1

In [12]:
from transformers import AutoTokenizer

In [13]:
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token_id = tokenizer.eos_token_id

In [14]:
from transformers import pipeline
from tqdm import tqdm

In [15]:
def predict(test, model, tokenizer):
    y_pred = []
    categories = ["Neutral", "Positive", "Negative"]
    
    for i in tqdm(range(len(test))):
        prompt = test.iloc[i]["Comment"]
        pipe = pipeline(task="text-generation", 
                        model=model, 
                        tokenizer=tokenizer, 
                        max_new_tokens=2, 
                        temperature=0.1)
        
        result = pipe(prompt)
        answer = result[0]['generated_text'].split("label:")[-1].strip()
        
        for category in categories:
            if category.lower() in answer.lower():
                y_pred.append(category)
                break
        else:
            y_pred.append("none")
    
    return y_pred

In [17]:
model = model.to('cuda')
# tokenizer = tokenizer.to('cuda')

In [18]:
y_pred = predict(X_test, model, tokenizer)

  0%|          | 0/900 [00:00<?, ?it/s]Device set to use cuda:0
  0%|          | 1/900 [00:01<24:18,  1.62s/it]Device set to use cuda:0
  0%|          | 2/900 [00:02<14:51,  1.01it/s]Device set to use cuda:0
  0%|          | 3/900 [00:02<12:26,  1.20it/s]Device set to use cuda:0
  0%|          | 4/900 [00:03<11:21,  1.32it/s]Device set to use cuda:0
  1%|          | 5/900 [00:04<10:40,  1.40it/s]Device set to use cuda:0
  1%|          | 6/900 [00:04<10:17,  1.45it/s]Device set to use cuda:0
  1%|          | 7/900 [00:05<09:53,  1.51it/s]Device set to use cuda:0
  1%|          | 8/900 [00:06<09:51,  1.51it/s]Device set to use cuda:0
  1%|          | 9/900 [00:06<09:59,  1.49it/s]Device set to use cuda:0
  1%|          | 10/900 [00:07<09:02,  1.64it/s]Device set to use cuda:0
  1%|          | 11/900 [00:07<09:11,  1.61it/s]Device set to use cuda:0
  1%|▏         | 12/900 [00:08<09:17,  1.59it/s]Device set to use cuda:0
  1%|▏         | 13/900 [00:09<09:26,  1.56it/s]Device set to use cud

In [19]:
import numpy as np
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

In [20]:
def evaluate(y_true, y_pred):
    labels = ["Neutral", "Positive", "Negative"]
    mapping = {label: idx for idx, label in enumerate(labels)}
    
    def map_func(x):
        return mapping.get(x, -1)
    
    y_true_mapped = np.vectorize(map_func)(y_true)
    y_pred_mapped = np.vectorize(map_func)(y_pred)
    
    accuracy = accuracy_score(y_true=y_true_mapped, y_pred=y_pred_mapped)
    print(f'Accuracy: {accuracy:.3f}')
    
    unique_labels = set(y_true_mapped)
    
    for label in unique_labels:
        label_indices = [i for i in range(len(y_true_mapped)) if y_true_mapped[i] == label]
        label_y_true = [y_true_mapped[i] for i in label_indices]
        label_y_pred = [y_pred_mapped[i] for i in label_indices]
        label_accuracy = accuracy_score(label_y_true, label_y_pred)
        print(f'Accuracy for label {labels[label]}: {label_accuracy:.3f}')
        
    class_report = classification_report(y_true=y_true_mapped, y_pred=y_pred_mapped, target_names=labels, labels=list(range(len(labels))))
    print('\nClassification Report:')
    print(class_report)
    
    conf_matrix = confusion_matrix(y_true=y_true_mapped, y_pred=y_pred_mapped, labels=list(range(len(labels))))
    print('\nConfusion Matrix:')
    print(conf_matrix)
    # plt.figure(figsize=(8, 6))
    # sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=y_true_mapped, yticklabels=y_pred_mapped)
    # plt.title('Confusion Matrix')
    # plt.xlabel('Predicted Labels')
    # plt.ylabel('True Labels')
    # plt.show()

In [21]:
evaluate(y_true, y_pred)

Accuracy: 0.013
Accuracy for label Neutral: 0.000
Accuracy for label Positive: 0.007
Accuracy for label Negative: 0.032

Classification Report:
              precision    recall  f1-score   support

     Neutral       0.00      0.00      0.00       309
    Positive       0.00      0.01      0.00       277
    Negative       0.09      0.03      0.05       314

   micro avg       0.01      0.01      0.01       900
   macro avg       0.03      0.01      0.02       900
weighted avg       0.03      0.01      0.02       900


Confusion Matrix:
[[  0 309   0]
 [162   2 100]
 [  6 293  10]]


In [22]:
import bitsandbytes as bnb

def find_all_linear_names(model):
    cls = bnb.nn.Linear4bit
    lora_module_names = set()
    for name, module in model.named_modules():
        if isinstance(module, cls):
            names = name.split('.')
            lora_module_names.add(names[0] if len(names) == 1 else names[-1])
    if 'lm_head' in lora_module_names:
        lora_module_names.remove('lm_head')
    return list(lora_module_names)

In [23]:
modules = find_all_linear_names(model)
modules

['q_proj', 'gate_proj', 'k_proj', 'up_proj', 'v_proj', 'o_proj', 'down_proj']

In [24]:
print(train_data.column_names)
print(eval_data.column_names)

['Comment']
['Comment']


In [25]:
train_data = train_data.rename_column("Comment", "text")
eval_data = eval_data.rename_column("Comment", "text")

In [26]:
from peft import LoraConfig
from transformers import TrainingArguments
from trl import SFTTrainer

In [27]:
output_dir="FineTuned-Llama-3.2-1B"

peft_config = LoraConfig(
    lora_alpha=16,
    lora_dropout=0,
    r=64,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=modules,
)

training_arguments = TrainingArguments(
    output_dir=output_dir,            
    num_train_epochs=1,             
    per_device_train_batch_size=1,        
    gradient_accumulation_steps=8,    
    gradient_checkpointing=True,         
    optim="paged_adamw_32bit",
    logging_steps=1,                         
    learning_rate=2e-4,        
    weight_decay=0.001,
    fp16=True,
    bf16=False,
    max_grad_norm=0.3,          
    max_steps=-1,
    warmup_ratio=0.03,             
    group_by_length=False,
    lr_scheduler_type="cosine",       
    report_to="wandb",            
    eval_strategy="steps",              
    eval_steps = 0.2,
    no_cuda=False
)

trainer = SFTTrainer(
    model=model,
    args=training_arguments,
    train_dataset=train_data,
    eval_dataset=eval_data,
    peft_config=peft_config,
    # dataset_text_field="text",
    # dataset_text_field="Comment",
    tokenizer=tokenizer,
    # max_seq_length=512,
    # packing=False,
    # dataset_kwargs={
    # "add_special_tokens": False,
    # "append_concat_token": False,
    # }
)

  trainer = SFTTrainer(
Map: 100%|██████████| 7200/7200 [00:00<00:00, 23275.29 examples/s]
Map: 100%|██████████| 900/900 [00:00<00:00, 3106.16 examples/s]


In [28]:
trainer.train()

[34m[1mwandb[0m: Currently logged in as: [33mpramanik-souvik[0m ([33mpramanik-souvik-north-south-university[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


  return fn(*args, **kwargs)


Step,Training Loss,Validation Loss
180,0.7821,0.62706
360,0.5052,0.583024
540,0.5325,0.55272
720,0.4495,0.535406
900,0.4604,0.531524


  return fn(*args, **kwargs)


TrainOutput(global_step=900, training_loss=0.5981219990054766, metrics={'train_runtime': 7722.4106, 'train_samples_per_second': 0.932, 'train_steps_per_second': 0.117, 'total_flos': 4718186817576960.0, 'train_loss': 0.5981219990054766, 'epoch': 1.0})

In [33]:
import wandb

In [34]:
wandb.finish()
model.config.use_cache = True

0,1
eval/loss,█▅▃▁▁
eval/runtime,▁▃█▁▅
eval/samples_per_second,█▆▁█▄
eval/steps_per_second,▇▆▁█▄
train/epoch,▁▁▁▁▁▂▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▆▆▆▇▇▇▇▇▇█████
train/global_step,▁▁▁▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▃▄▄▄▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇██
train/grad_norm,█▇▆▅▆▄▂▂▃▅▃▁▄▃▁▂▁▂▁▂▃▃▂▂▃▃▁▁▁▂▂▁▂▃▂▂▁▁▁▁
train/learning_rate,▂▄▄███████▇▇▇▇▇▆▆▆▆▆▅▄▄▄▄▄▃▃▃▃▃▂▂▂▂▁▁▁▁▁
train/loss,▇█▃▄▅▅▄▄▃▄▄▃▄▃▃▄█▂▄▁▃▃▃▅▃▃▄▃▃▄▃▂▂▆▂▄▃▄▁▁

0,1
eval/loss,0.53152
eval/runtime,216.0307
eval/samples_per_second,4.166
eval/steps_per_second,0.523
total_flos,4718186817576960.0
train/epoch,1.0
train/global_step,900.0
train/grad_norm,0.31791
train/learning_rate,0.0
train/loss,0.4604


In [35]:
trainer.save_model(output_dir)
tokenizer.save_pretrained(output_dir)

('FineTuned-Llama-3.2-1B\\tokenizer_config.json',
 'FineTuned-Llama-3.2-1B\\special_tokens_map.json',
 'FineTuned-Llama-3.2-1B\\tokenizer.json')

In [36]:
y_pred = predict(X_test, model, tokenizer)
evaluate(y_true, y_pred)

  0%|          | 0/900 [00:00<?, ?it/s]Device set to use cuda:0
  0%|          | 1/900 [00:06<1:40:06,  6.68s/it]Device set to use cuda:0
Device set to use cuda:0
  0%|          | 3/900 [00:06<26:50,  1.80s/it]  Device set to use cuda:0
Device set to use cuda:0
  1%|          | 5/900 [00:07<13:42,  1.09it/s]Device set to use cuda:0
Device set to use cuda:0
  1%|          | 7/900 [00:07<08:29,  1.75it/s]Device set to use cuda:0
Device set to use cuda:0
  1%|          | 9/900 [00:07<05:46,  2.57it/s]Device set to use cuda:0
Device set to use cuda:0
  1%|          | 11/900 [00:07<04:14,  3.49it/s]Device set to use cuda:0
Device set to use cuda:0
  1%|▏         | 13/900 [00:14<19:56,  1.35s/it]Device set to use cuda:0
  2%|▏         | 14/900 [00:14<16:22,  1.11s/it]Device set to use cuda:0
Device set to use cuda:0
  2%|▏         | 16/900 [00:14<11:02,  1.33it/s]Device set to use cuda:0
  2%|▏         | 17/900 [00:14<09:07,  1.61it/s]Device set to use cuda:0
  2%|▏         | 18/900 [00:20<2

Accuracy: 0.328
Accuracy for label Neutral: 0.000
Accuracy for label Positive: 0.679
Accuracy for label Negative: 0.341

Classification Report:
              precision    recall  f1-score   support

     Neutral       0.00      0.00      0.00       309
    Positive       0.28      0.68      0.39       277
    Negative       0.63      0.34      0.44       314

   micro avg       0.33      0.33      0.33       900
   macro avg       0.30      0.34      0.28       900
weighted avg       0.30      0.33      0.28       900


Confusion Matrix:
[[  0 309   0]
 [ 24 188  64]
 [ 27 180 107]]



