In [1]:
!pip install -q -U torch "tensorboard==2.15"

In [2]:
!pip install -q -U git+https://github.com/huggingface/trl@a3c5b7178ac4f65569975efadc97db2f3749c65e
!pip install -q -U git+https://github.com/huggingface/peft@4a1559582281fc3c9283892caea8ccef1d6f5a4f


  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
  Building wheel for trl (pyproject.toml) ... [?25l[?25hdone
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
  Building wheel for peft (pyproject.toml) ... [?25l[?25hdone


In [3]:
import warnings
warnings.filterwarnings("ignore")

In [4]:
!pip install -q -U transformers datasets peft trl accelerate bitsandbytes

In [5]:
import numpy as np
import pandas as pd
import os
from tqdm import tqdm
import bitsandbytes as bnb
import torch
import torch.nn as nn
import transformers
from datasets import Dataset
from peft import LoraConfig,PeftConfig
from trl import SFTTrainer
from trl import setup_chat_format
from transformers import (AutoModelForCausalLM,
                          AutoTokenizer,
                          BitsAndBytesConfig,
                          TrainingArguments,
                          pipeline,
                          logging)
from sklearn.metrics import (accuracy_score,
                             classification_report,
                             confusion_matrix)
from sklearn.model_selection import train_test_split


In [None]:
print(f"pytorch version {torch.__version__}")

pytorch version 2.8.0+cu126


In [6]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"working on {device}")

working on cuda:0


In [7]:
from google.colab import drive
drive.mount('/content/drive')

filename = "/content/drive/MyDrive/data_files/all-data_fin.csv"
df = pd.read_csv(filename,
                 names=["sentiment", "text"],
                 encoding="utf-8", encoding_errors="replace")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
X_train = list()
X_test = list()
for sentiment in["positive", "neutral", "negative"]:
  train, test = train_test_split(df[df.sentiment == sentiment],
                                 train_size = 300,
                                 test_size = 300,
                                 random_state = 42
                                 )
X_train.append(train)
X_test.append(test)
X_train = pd.concat(X_train).sample(frac=1, random_state=10)
X_test = pd.concat(X_test) 

eval_idx = [idx for idx in df.index if idx not in list(train.index) + list(test.index)]
X_eval = df[df.index.isin(eval_idx)]
X_eval = (X_eval
          .groupby('sentiment', group_keys=False)
          .apply(lambda x: x.sample(n=50, random_state=10, replace=True)))
X_train = X_train.reset_index(drop=True)


def generate_prompt(data_point) :
  return f"""
           Analyze the sentiment of the news headlines enlosed in the square brackets,
           determine if it is positive, neutral or negative, and return the answer as the
           corresponding sentiment label "positive or"neutral or "negative".
           [{data_point["text"]}] = {data_point["sentiment"]}
           """.strip()
def generate_test_prompt(data_point):
  return f"""
           Analyze the sentiment of the news headline enclosed in square brackets,
           determine if it is positive, neutral, or negative, and return the answer as
           the corresponding sentiment label "positive" or "neutral" or "negative".
           [{data_point["text"]}] = """.strip()

X_train = pd.DataFrame(X_train.apply(generate_prompt, axis = 1), columns = ["text"])
X_eval = pd.DataFrame(X_eval.apply(generate_prompt, axis = 1), columns = ["text"])
Y_true = X_test.sentiment
X_test = pd.DataFrame(X_test.apply(generate_test_prompt, axis =1), columns = ["text"])

train_data = Dataset.from_pandas(X_train)
eval_data = Dataset.from_pandas(X_eval)

In [9]:
def evaluate(y_true, y_pred):
    labels = ['positive', 'neutral', 'negative']
    mapping = {'positive': 2, 'neutral': 1, 'none':1, 'negative': 0}
    def map_func(x):
        return mapping.get(x, 1)

    y_true = np.vectorize(map_func)(y_true)
    y_pred = np.vectorize(map_func)(y_pred)

    # Calculate accuracy
    accuracy = accuracy_score(y_true=y_true, y_pred=y_pred)
    print(f'Accuracy: {accuracy:.3f}')

    # Generate accuracy report
    unique_labels = set(y_true)  # Get unique labels

    for label in unique_labels:
        label_indices = [i for i in range(len(y_true))
                         if y_true[i] == label]
        label_y_true = [y_true[i] for i in label_indices]
        label_y_pred = [y_pred[i] for i in label_indices]
        accuracy = accuracy_score(label_y_true, label_y_pred)
        print(f'Accuracy for label {label}: {accuracy:.3f}')

    # Generate classification report
    class_report = classification_report(y_true=y_true, y_pred=y_pred)
    print('\nClassification Report:')
    print(class_report)

    # Generate confusion matrix
    conf_matrix = confusion_matrix(y_true=y_true, y_pred=y_pred, labels=[0, 1, 2])
    print('\nConfusion Matrix:')
    print(conf_matrix)

In [None]:
!huggingface-cli login


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    To log in, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Enter your token (input will not be visible): 
Add token as git credential? (Y/n) n
Token is valid (permission: read).
The token `grad_2` has been saved to /root/.cache/huggingface/stored_tokens
Your token has been saved to /root/.cache/huggingface/token
Login successful.
The current active token is: `grad_2`


In [None]:
model_name = "meta-llama/Llama-2-7b-chat-hf"
compute_dtype = getattr(torch, "float16")
bnb_config = BitsAndBytesConfig(
    load_in_4bit = True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=True,
)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map = device,
    torch_dtype = compute_dtype,
    quantization_config = bnb_config,
)

model.config.use_cache = False
model.config.pretraining_tp = 1

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code = True,)

tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"


config.json:   0%|          | 0.00/614 [00:00<?, ?B/s]

`torch_dtype` is deprecated! Use `dtype` instead!


model.safetensors.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/188 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.62k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

In [20]:
def predict(test, model, tokenizer):
    y_pred = []
    for i in tqdm(range(len(X_test))):
        prompt = X_test.iloc[i]["text"]
        pipe = pipeline(task="text-generation",
                        model=model,
                        tokenizer=tokenizer,
                        max_new_tokens = 1,
                        do_sample=False
                       )
        result = pipe(prompt)

        answer = result[0]['generated_text'].split("=")[-1]
        if "positive" in answer:
            y_pred.append("positive")
        elif "negative" in answer:
            y_pred.append("negative")
        elif "neutral" in answer:
            y_pred.append("neutral")
        else:
            y_pred.append("none")
    return y_pred

y_pred = predict(X_test, model, tokenizer)


evaluate(Y_true,y_pred)

  0%|          | 0/300 [00:00<?, ?it/s]Device set to use cuda:0
The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
  0%|          | 1/300 [00:01<06:16,  1.26s/it]Device set to use cuda:0
  1%|          | 2/300 [00:01<03:03,  1.62it/s]Device set to use cuda:0
  1%|          | 3/300 [00:01<01:55,  2.57it/s]Device set to use cuda:0
  1%|▏         | 4/300 [00:01<01:25,  3.45it/s]Device set to use cuda:0
  2%|▏         | 5/300 [00:01<01:06,  4.41it/s]Device set to use cuda:0
  2%|▏         | 6/300 [00:01<00:55,  5.32it/s]Device set to use cuda:0
  2%|▏         | 7/300 [00:02<00:54,  5.43it/s]Device set to use cuda:0
  3%|▎         | 8/300 [00:02<00:47,  6.10it/s]Device set to use cuda:0
  3%|▎         | 9/300 [00:02<00:43,  6.76it/s]Device set to use cuda:0
  3%|▎         | 10/300 [00:02<00:40,  7.19it/s]Device set to use cuda:0
  4%|▎         | 11/300 [00:02<00:40,  7.13it/s]Device set to use cuda:

Accuracy: 1.000
Accuracy for label 0: 1.000

Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       300

    accuracy                           1.00       300
   macro avg       1.00      1.00      1.00       300
weighted avg       1.00      1.00      1.00       300


Confusion Matrix:
[[300   0   0]
 [  0   0   0]
 [  0   0   0]]





In [None]:
output_dir="trained_weights"

peft_config = LoraConfig(
        lora_alpha=16,
        lora_dropout=0.1,
        r=64,
        bias="none",
        target_modules="all-linear",
        task_type="CAUSAL_LM",
)

training_arguments = TrainingArguments(
    output_dir=output_dir,
    num_train_epochs=2,
    per_device_train_batch_size=1,
    gradient_accumulation_steps=8,
    optim="paged_adamw_32bit",
    logging_steps=25,
    learning_rate=2e-4,
    fp16=True,
    max_grad_norm=0.3,
    max_steps=-1,
    warmup_ratio=0.03,
    group_by_length=True,
    lr_scheduler_type="cosine",
    report_to="tensorboard",
    eval_strategy="epoch"
)

trainer = SFTTrainer(
    model=model,
    args=training_arguments,
    train_dataset=train_data,
    eval_dataset=eval_data,
    peft_config=peft_config,

)

trainer.train()


Adding EOS to train dataset:   0%|          | 0/300 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/300 [00:00<?, ? examples/s]

Truncating train dataset:   0%|          | 0/300 [00:00<?, ? examples/s]

Adding EOS to eval dataset:   0%|          | 0/150 [00:00<?, ? examples/s]

Tokenizing eval dataset:   0%|          | 0/150 [00:00<?, ? examples/s]

Truncating eval dataset:   0%|          | 0/150 [00:00<?, ? examples/s]

The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'pad_token_id': None}.


Epoch,Training Loss,Validation Loss,Entropy,Num Tokens,Mean Token Accuracy
1,1.5905,0.872008,0.709057,29539.0,0.827159
2,0.6574,0.870023,0.710912,59078.0,0.82896


TrainOutput(global_step=76, training_loss=0.9829869599718797, metrics={'train_runtime': 429.7968, 'train_samples_per_second': 1.396, 'train_steps_per_second': 0.177, 'total_flos': 2398774089105408.0, 'train_loss': 0.9829869599718797, 'epoch': 2.0})

In [None]:
model_save_path = "/content/drive/MyDrive/data_files/model_for_sentiment"
tokenizer_save_path = "/content/drive/MyDrive/data_files/tokenizer_for_sentiment"
trainer.save_model(model_save_path)
tokenizer.save_pretrained(tokenizer_save_path)

('/content/drive/MyDrive/data_files/tokenizer_for_sentiment/tokenizer_config.json',
 '/content/drive/MyDrive/data_files/tokenizer_for_sentiment/special_tokens_map.json',
 '/content/drive/MyDrive/data_files/tokenizer_for_sentiment/chat_template.jinja',
 '/content/drive/MyDrive/data_files/tokenizer_for_sentiment/tokenizer.model',
 '/content/drive/MyDrive/data_files/tokenizer_for_sentiment/added_tokens.json',
 '/content/drive/MyDrive/data_files/tokenizer_for_sentiment/tokenizer.json')

In [17]:

import gc
del [df, X_train, X_eval]
del [TrainingArguments, SFTTrainer, LoraConfig, BitsAndBytesConfig]
torch.cuda.empty_cache()
gc.collect()


810

In [18]:
from peft import AutoPeftModelForCausalLM
finetuned_model_path = "/content/drive/MyDrive/data_files/model_for_sentiment/"
model = AutoPeftModelForCausalLM.from_pretrained(
     finetuned_model_path,
     torch_dtype=torch.float16,
     return_dict=True,
     low_cpu_mem_usage=True,
     device_map=device,
)

merged_model = model.merge_and_unload()
tokenizer = AutoTokenizer.from_pretrained("/content/drive/MyDrive/data_files/tokenizer_for_sentiment")
merged_model.save_pretrained("/content/drive/MyDrive/data_files/merged_model_for_sentiment", safe_serialization=True, max_shard_size="2GB")
tokenizer.save_pretrained("/content/drive/MyDrive/data_files/merged_model_for_sentiment")


`torch_dtype` is deprecated! Use `dtype` instead!


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

('/content/drive/MyDrive/data_files/merged_model_for_sentiment/tokenizer_config.json',
 '/content/drive/MyDrive/data_files/merged_model_for_sentiment/special_tokens_map.json',
 '/content/drive/MyDrive/data_files/merged_model_for_sentiment/chat_template.jinja',
 '/content/drive/MyDrive/data_files/merged_model_for_sentiment/tokenizer.model',
 '/content/drive/MyDrive/data_files/merged_model_for_sentiment/added_tokens.json',
 '/content/drive/MyDrive/data_files/merged_model_for_sentiment/tokenizer.json')

In [21]:
y_pred = predict(X_test, merged_model, tokenizer)
evaluate(Y_true,y_pred)

  0%|          | 0/300 [00:00<?, ?it/s]Device set to use cuda:0
  0%|          | 1/300 [00:00<00:38,  7.72it/s]Device set to use cuda:0
  1%|          | 2/300 [00:00<00:47,  6.31it/s]Device set to use cuda:0
  1%|          | 3/300 [00:00<00:40,  7.34it/s]Device set to use cuda:0
  1%|▏         | 4/300 [00:00<00:36,  8.10it/s]Device set to use cuda:0
  2%|▏         | 5/300 [00:00<00:34,  8.58it/s]Device set to use cuda:0
  2%|▏         | 6/300 [00:00<00:32,  8.97it/s]Device set to use cuda:0
  2%|▏         | 7/300 [00:00<00:36,  7.93it/s]Device set to use cuda:0
  3%|▎         | 8/300 [00:00<00:34,  8.43it/s]Device set to use cuda:0
  3%|▎         | 9/300 [00:01<00:33,  8.69it/s]Device set to use cuda:0
  3%|▎         | 10/300 [00:01<00:33,  8.60it/s]Device set to use cuda:0
  4%|▎         | 11/300 [00:01<00:33,  8.65it/s]Device set to use cuda:0
  4%|▍         | 12/300 [00:01<00:33,  8.65it/s]Device set to use cuda:0
  4%|▍         | 13/300 [00:01<00:37,  7.72it/s]Device set to use cud

Accuracy: 1.000
Accuracy for label 0: 1.000

Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       300

    accuracy                           1.00       300
   macro avg       1.00      1.00      1.00       300
weighted avg       1.00      1.00      1.00       300


Confusion Matrix:
[[300   0   0]
 [  0   0   0]
 [  0   0   0]]





In [22]:

evaluation = pd.DataFrame({'text': X_test["text"],
                           'y_true':Y_true,
                           'y_pred': y_pred},
                         )
evaluation.to_csv("/content/drive/MyDrive/data_files/test_predictions.csv", index=False)