In [None]:
!pip install transformers datasets peft accelerate bitsandbytes


In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer

In [None]:
model_name = "unsloth/Llama-3.2-1B"

tokenizer = AutoTokenizer.from_pretrained("unsloth/Llama-3.2-1B")

In [None]:
model_train = AutoModelForCausalLM.from_pretrained("unsloth/Llama-3.2-1B", load_in_8bit=True, device_map="auto")

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


In [None]:
base_model = AutoModelForCausalLM.from_pretrained("unsloth/Llama-3.2-1B", load_in_8bit=True, device_map="auto")

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


In [None]:
!pip install peft



In [None]:
from transformers import Trainer, TrainingArguments
from datasets import load_dataset

from peft import LoraConfig, get_peft_model, TaskType


In [None]:
lora_config = LoraConfig (
    r = 8,
    lora_alpha = 32,
    target_modules = ["q_proj", "v_proj"],
    lora_dropout = 0.05,
    bias = "none",
    task_type = TaskType.CAUSAL_LM
)


model = get_peft_model(model_train, lora_config)

In [None]:
import pandas as pd

dataset = pd.read_json("https://huggingface.co/datasets/oopere/RetailBanking-Conversations/resolve/main/retail_banking_dataset.json")

In [None]:
from datasets import Dataset

dataset = Dataset.from_pandas(dataset)


In [None]:
def tokenize(example):
  full_text = "input: " + example["rol1"] + "\n" +"output: " + example["rol1"]
  return tokenizer(full_text, max_length=512)

In [None]:
tokenized_dataset = dataset.map(tokenize)

Map:   0%|          | 0/1423 [00:00<?, ? examples/s]

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


In [None]:
trainings_args = TrainingArguments(
    per_device_train_batch_size=4,
    output_dir="./llama_banking",
    gradient_accumulation_steps=4,
    num_train_epochs=10,
    learning_rate=1e-3,
    fp16=True,
    warmup_ratio=0.03,
    logging_steps=100,
    save_strategy="epoch"
)

In [None]:
from transformers import DataCollatorForLanguageModeling

data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

In [None]:
trainer = Trainer(
    model=model,
    args = trainings_args,
    train_dataset=tokenized_dataset,
    data_collator=data_collator
)

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


In [None]:
trainer.train()

Step,Training Loss
100,0.8235
200,0.6813
300,0.5904
400,0.5173
500,0.4422
600,0.3785
700,0.3273
800,0.2767


TrainOutput(global_step=890, training_loss=0.4781609953119514, metrics={'train_runtime': 886.1993, 'train_samples_per_second': 16.057, 'train_steps_per_second': 1.004, 'total_flos': 5292526351073280.0, 'train_loss': 0.4781609953119514, 'epoch': 10.0})

In [None]:
devic

In [None]:
model.save_pretrained("banking_adapter")

In [None]:
from peft import PeftModel

merged_model = PeftModel.from_pretrained(base_model, "/content/retail_banking_adapter")

In [None]:
merged_model = merged_model.merge_and_unload()



In [None]:
merged_model.push_to_hub("Banking")

model.safetensors:   0%|          | 0.00/1.50G [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/Kanishkaran/Banking/commit/94696fdc61dfb9c19ba555a1646f41175ff38821', commit_message='Upload LlamaForCausalLM', commit_description='', oid='94696fdc61dfb9c19ba555a1646f41175ff38821', pr_url=None, repo_url=RepoUrl('https://huggingface.co/Kanishkaran/Banking', endpoint='https://huggingface.co', repo_type='model', repo_id='Kanishkaran/Banking'), pr_revision=None, pr_num=None)

In [None]:
from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
tokenizer.push_to_hub("Banking")

README.md:   0%|          | 0.00/5.17k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/Kanishkaran/Banking/commit/458f8b2ef456141f5aac4a4839e4282a13cb9270', commit_message='Upload tokenizer', commit_description='', oid='458f8b2ef456141f5aac4a4839e4282a13cb9270', pr_url=None, repo_url=RepoUrl('https://huggingface.co/Kanishkaran/Banking', endpoint='https://huggingface.co', repo_type='model', repo_id='Kanishkaran/Banking'), pr_revision=None, pr_num=None)

In [None]:
from transformers import pipeline

In [None]:
newModel = pipeline(model="Kanishkaran/llama_merged", tokenizer="unsloth/Llama-3.2-1B")

config.json:   0%|          | 0.00/1.37k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.50G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/230 [00:00<?, ?B/s]

Device set to use cuda:0


In [None]:
newModel_2 = pipeline(model="Kanishkaran/Banking", tokenizer="unsloth/Llama-3.2-1B")

Device set to use cuda:0


In [None]:
base_model = pipeline(model="unsloth/Llama-3.2-1B", tokenizer="unsloth/Llama-3.2-1B")

config.json:   0%|          | 0.00/935 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/2.47G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/230 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/50.6k [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/459 [00:00<?, ?B/s]

Device set to use cuda:0


In [None]:
question = "help me generate new checkbook request"

In [None]:
question = "how can i update my phone number or email for my account"

In [None]:
base_model(question ,max_new_tokens=50)[0]['generated_text']

'how can i update my phone number or email for my account?\nIf you have forgotten your password, please follow the instructions below.'

In [None]:
newModel(question ,max_new_tokens=50)[0]['generated_text']

"how can i update my phone number or email for my account?\nYou can update your details via net banking or by visiting the branch. To update via net banking: 1. Login to your bank’s website/app 2. Go to 'Profile Settings' → 'Update Mobile/Email' 3."

In [None]:
newModel_2("I'm looking to explore different savings options that allow for medium-term withdrawal before maturity.", max_new_tokens=50)[0]['generated_text']

"I'm looking to explore different savings options that allow for medium-term withdrawal before maturity. Any advice on how to approach this?\nI'm looking to explore different savings options that allow for medium-term withdrawal before maturity. Any advice on how to approach this? I'm looking to explore different savings options that allow for medium-term withdrawal before maturity."