In [1]:
import torch
import datasets
import transformers
import torch.nn as nn
import pandas as pd
from peft import LoraConfig
from trl import SFTTrainer
from transformers import BitsAndBytesConfig
from torch.utils.data import Dataset
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments

In [None]:
train_df = pd.read_excel('English_Merged.xlsx'). # Path to the training data
train_df.head()

In [None]:
device = 'cpu'
if(torch.cuda.is_available()):
    device = 'cuda'

print(device)

In [None]:
base_model_name = "meta-llama/Llama-3.2-3B-Instruct" # Or any other model from HuggingFace

tokenizer = AutoTokenizer.from_pretrained(base_model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"  

quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=False
)

base_model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    token='',  # Enter HuggingFace access token
    quantization_config = quant_config,
    device_map={"": 0}
)

In [7]:
def generate_prompt(example):
    return f"""### Question: Classify the following financial statement into one of the categories: sustainable or unsustainable, with a short explanation including both positive and negative reasoning. Statement: {example['Statement']} \n### Answer: {example['Label']}. Positive Reason: {example['Positive Reason']} Negative Reason: {example['Negative Reason']}""".strip()

In [8]:
train_df['prompt'] = train_df.apply(generate_prompt, axis=1)

In [None]:
train_df = datasets.Dataset.from_dict(train_df)
train_df

In [10]:
def formatting_prompts_func(example):
    output_texts = []
    for i in range(len(example['prompt'])):
        output_texts.append(example['prompt'][i])
    return output_texts

In [13]:
from peft import LoraConfig

peft_config = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.1,
    r=64,
    bias="none",
    task_type="CAUSAL_LM",
)

base_model.add_adapter(peft_config)

### Training

In [None]:
train_params = TrainingArguments(
    num_train_epochs = 40,
    per_device_train_batch_size = 4, 
    gradient_accumulation_steps = 1,
    optim="paged_adamw_32bit",
    save_strategy = "no",
    logging_steps = 50,
    learning_rate = 2e-4,
    weight_decay = 0.001,
    fp16 = False,
    bf16 = False,
    max_grad_norm = 0.3,
    max_steps = -1,
    warmup_ratio = 0.03,
    group_by_length = True,
    lr_scheduler_type = "constant", 
)

In [None]:
trainer = SFTTrainer(
    model=base_model,
    train_dataset=train_df,
    args=train_params,
    formatting_func=formatting_prompts_func,
)

In [None]:
trainer.train()

In [None]:
trainer.save_model("final_model")
tokenizer.save_pretrained("final_model")

### Evaluation

In [None]:
import torch
import transformers
import torch.nn as nn
import pandas as pd
from peft import LoraConfig
from sklearn.metrics import accuracy_score
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments

In [None]:
testset = pd.read_csv("test_data_english.csv") # Path to the test data
testset.head()

In [None]:
def summarize(text: str):
    inputs = tokenizer(text, return_token_type_ids=False,return_tensors="pt").to("cuda")
    with torch.inference_mode():
        outputs = base_model.generate(**inputs, max_new_tokens=5, temperature=0.1)        
        answer_tokens = outputs[:, inputs.input_ids.shape[1] :]
    return tokenizer.decode(answer_tokens[0], skip_special_tokens=True)

correct = 0
total = 0
TP, FP, TN, FN = 0, 0, 0, 0

for i in range(len(testset['sentence'])):
    text = f"### Question: Classify the following financial statement into one of the categories: 'sustainable' or 'unsustainable,'. Statement: {testset['sentence'][i]} \n### Answer:"
    
    pred = []
    processed_output = summarize(text)

    if "unsustainable" in processed_output:
        result = "unsustainable"
    else:
        result = "sustainable"

    if result == testset['label'][i].lower():
        correct = correct+1
    total = total + 1

    actual = testset['label'][i].lower()

    if result == "sustainable":
        if actual == "sustainable":
            TP += 1
        else:
            FP += 1
    else:
        if actual == "unsustainable":
            TN += 1
        else:
            FN += 1


In [None]:
accuracy = correct/total
precision = TP / (TP + FP)
recall = TP / (TP + FN)
f1_score = (2 * precision * recall) / (precision + recall)

print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-score: {f1_score:.4f}")