In [1]:
import os
import pandas as pd
import torch
from transformers import AutoTokenizer, LlamaForSequenceClassification, Trainer, TrainingArguments
from datasets import Dataset
from huggingface_hub import login
from sklearn.preprocessing import LabelEncoder
from transformers import DataCollatorWithPadding
from peft import get_peft_model, LoraConfig, TaskType

TOKEN = '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'
login(TOKEN)

model_name = "meta-llama/Llama-3.2-1B"  
print(f"Loading model {model_name}...")
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = LlamaForSequenceClassification.from_pretrained(model_name, num_labels=7)
print("Model loaded successfully.")

if tokenizer.pad_token is None:
    if tokenizer.eos_token is not None:
        tokenizer.pad_token = tokenizer.eos_token
        print(f"Padding token set to {tokenizer.pad_token}.")
    else:
        raise ValueError("Both pad_token and eos_token are None. Set a padding token.")
else:
    print("Padding token already defined.")

model.config.pad_token_id = tokenizer.pad_token_id

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)
print(f"Model moved to {'GPU' if device.type == 'cuda' else 'CPU'}.")

model.gradient_checkpointing_enable()

lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.1,
    task_type=TaskType.SEQ_CLS,
)

model = get_peft_model(model, lora_config)

for param in model.base_model.parameters():
    param.requires_grad = False

print("Base model parameters frozen.")

print("Loading and tokenizing dataset...")

Loading model meta-llama/Llama-3.2-1B...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at meta-llama/Llama-3.2-1B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model loaded successfully.
Padding token set to <|end_of_text|>.
Model moved to GPU.
Base model parameters frozen.
Loading and tokenizing dataset...


In [2]:
import pandas as pd
train_data = pd.read_json("/home/iiitd/LLM/dataset/meld/train.json")
valid_data = pd.read_json("/home/iiitd/LLM/dataset/meld/valid.json")
test_data = pd.read_json("/home/iiitd/LLM/dataset/meld/test.json")

In [3]:
def aplicator(index : int, df : pd.DataFrame):
    t = df.iloc[max(0,index-5):index]
    text = "Find the next sentiment of the given sequence:- \n"
    for ind, rw in t.iterrows():
        text += rw["input"] + " " + rw["target"] + '\n'
    text += df["input"][index] + " "
    return text

In [4]:
def null_aplicator(index: int, df: pd.DataFrame):
    if((index+1) % 5 != 0):
        return None
    return aplicator(index,df)

In [5]:
train_data["comb"] = train_data.apply(lambda x: aplicator(int(x.name),train_data),axis=1)
valid_data["comb"] = valid_data.apply(lambda x: aplicator(int(x.name),valid_data),axis=1)
test_data["comb"] = test_data.apply(lambda x: aplicator(int(x.name),test_data),axis=1)

In [6]:
train_data["ncomb"] = train_data.apply(lambda x: null_aplicator(int(x.name),train_data),axis=1)
valid_data["ncomb"] = valid_data.apply(lambda x: null_aplicator(int(x.name),valid_data),axis=1)
test_data["ncomb"] = test_data.apply(lambda x: null_aplicator(int(x.name),test_data),axis=1)

In [7]:
train_data_combined = train_data.dropna()
valid_data_combined = valid_data.dropna()
test_data_combined = test_data.dropna()

In [8]:
import numpy as np
from sklearn.metrics import accuracy_score, f1_score

label_encoder = LabelEncoder()
label_encoder.fit(train_data['target'])

train_data['label'] = label_encoder.transform(train_data['target'])
valid_data['label'] = label_encoder.transform(valid_data['target'])
test_data['label'] = label_encoder.transform(test_data['target'])

train_dataset = Dataset.from_pandas(train_data)
valid_dataset = Dataset.from_pandas(valid_data)
test_dataset = Dataset.from_pandas(test_data)

def preprocess_function(examples):
    tokenized_output = tokenizer(
        examples['comb'], 
        truncation=True, 
        padding='longest',
        return_tensors='pt'
    )
    tokenized_output['label'] = examples['label']
    return tokenized_output

print("Tokenizing datasets...")
tokenized_train_dataset = train_dataset.map(preprocess_function, batched=True)
tokenized_valid_dataset = valid_dataset.map(preprocess_function, batched=True)
tokenized_test_dataset = test_dataset.map(preprocess_function, batched=True)
print("Datasets tokenized successfully.")

tokenized_train_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])
tokenized_valid_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])
tokenized_test_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])

data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    acc = accuracy_score(labels, predictions)
    f1_weighted = f1_score(labels, predictions, average='weighted')
    f1_macro = f1_score(labels, predictions, average='macro')
    return {
        'accuracy': acc,
        'f1_weighted': f1_weighted,
        'f1_macro': f1_macro
    }


training_args = TrainingArguments(
    output_dir='./Adaptive-Finetuned-Llama',
    per_device_train_batch_size=10,
    per_device_eval_batch_size=10,
    num_train_epochs=3,
    logging_dir='./logs',
    no_cuda=False,
    fp16=True,
)

print("Training arguments set up successfully.")

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train_dataset,
    eval_dataset=tokenized_valid_dataset,
    data_collator=data_collator,
    compute_metrics=compute_metrics
)

print("Trainer created successfully. Starting training...")
trainer.train()
print("Training complete.")

Tokenizing datasets...


Map:   0%|          | 0/9989 [00:00<?, ? examples/s]

Map:   0%|          | 0/1109 [00:00<?, ? examples/s]

Map:   0%|          | 0/2610 [00:00<?, ? examples/s]

Datasets tokenized successfully.
Training arguments set up successfully.
Trainer created successfully. Starting training...


`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.


Step,Training Loss
500,4.3364
1000,4.3372
1500,4.3365




Training complete.


In [11]:
test_results = trainer.evaluate(tokenized_test_dataset)

In [None]:
test_results

In [12]:
for i,j in test_results.items():
  print(f'{i} : {j}')

eval_loss : 4.34068489074707
eval_accuracy : 0.05938697318007663
eval_f1_weighted : 0.015273100284119514
eval_f1_macro : 0.03061197000869658
eval_runtime : 79.7674
eval_samples_per_second : 32.72
eval_steps_per_second : 1.642
epoch : 3.0


In [13]:
model.save_pretrained('./Adaptive-Finetuned-Llama/model')
tokenizer.save_pretrained('./Adaptive-Finetuned-Llama/tokenizer')
print("Model and tokenizer saved.")
print("Training completed successfully.")

Model and tokenizer saved.
Training completed successfully.
