# ***IMPORTING***

In [None]:
!pip install transformers accelerate datasets peft bitsandbytes evaluate

# ***Libraries***

In [2]:
import torch
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    TrainingArguments,
    Trainer,
    DataCollatorForLanguageModeling
)
from datasets import load_dataset
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training

# ***DATASET***

In [3]:
from datasets import load_dataset

ds = load_dataset("alespalla/chatbot_instruction_prompts")

train_ds = ds["train"].shuffle(seed=42).select(range(10000))   # first 10k examples
test_ds = ds["test"].shuffle(seed=42).select(range(2000))      # first 2k examples

def format_example(e):
    return {
        "text": f"### Instruction:\n{e['prompt']}\n\n### Response:\n{e['response']}"
    }

train_ds = train_ds.map(format_example)
test_ds = test_ds.map(format_example)

print(train_ds[0])

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


{'response': 'Over two million people died at Nagasaki and Hiroshima. Not exactly the hilarious fodder for making jokes.', 'prompt': 'It is called the bomb.', 'text': '### Instruction:\nIt is called the bomb.\n\n### Response:\nOver two million people died at Nagasaki and Hiroshima. Not exactly the hilarious fodder for making jokes.'}


# ***Tokenizer***

In [4]:
MODEL_NAME = "gpt2"

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
tokenizer.pad_token = tokenizer.eos_token # A: [101, 102, 2] , B: [101, 2,     2] --> 2 is <end> token to match the len
tokenizer.padding_side = "right"

# ***Load Model with 8-bit + LoRA Prep***

In [5]:
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    load_in_8bit=True,
    device_map="auto",
    torch_dtype=torch.float16
)

model = prepare_model_for_kbit_training(model)

`torch_dtype` is deprecated! Use `dtype` instead!
The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


# ***Apply LoRA Configuration***

In [6]:
lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["c_attn"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

trainable params: 294,912 || all params: 124,734,720 || trainable%: 0.2364


# ***Tokenization***

In [7]:
def tokenize_fn(batch):
    return tokenizer(
        batch["text"],
        truncation=True,
        padding="max_length",
        max_length=256  # shorter for faster training
    )

# Tokenize the subset datasets
tokenized_train = train_ds.map(
    tokenize_fn,
    batched=True,
    remove_columns=train_ds.column_names  # remove only columns in subset
)

tokenized_test = test_ds.map(
    tokenize_fn,
    batched=True,
    remove_columns=test_ds.column_names
)

Map:   0%|          | 0/2000 [00:00<?, ? examples/s]

# ***Data Collator***

In [8]:
data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False
)

# ***evaluate***

In [9]:
import evaluate
import numpy as np

metric = evaluate.load("accuracy")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

# ***Training Arguments***

In [13]:
training_args = TrainingArguments(
    output_dir="/content/support-bot-gpt2",
    num_train_epochs=1,
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    learning_rate=2e-4,
    fp16=True,
    save_steps=1000,
    logging_steps=50,
    eval_steps=1000,
    save_total_limit=2,
    load_best_model_at_end=False,
    report_to="none"
)

# ***Trainer Setup***

In [14]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,  # use tokenized subset
    eval_dataset=tokenized_test,    # use tokenized subset
    data_collator=data_collator,
    compute_metrics=compute_metrics
)

In [15]:
trainer.train()

  return fn(*args, **kwargs)


Step,Training Loss
50,2.5057
100,2.5125
150,2.4469
200,2.4562
250,2.4678
300,2.4649
350,2.4904
400,2.4922
450,2.4218
500,2.4435


  return fn(*args, **kwargs)


TrainOutput(global_step=1250, training_loss=2.4492092041015625, metrics={'train_runtime': 814.9748, 'train_samples_per_second': 12.27, 'train_steps_per_second': 1.534, 'total_flos': 1310990008320000.0, 'train_loss': 2.4492092041015625, 'epoch': 1.0})

# ***SAVING***

In [20]:
OUTPUT_DIR = "/content/support-bot-gpt2"

trainer.save_model(OUTPUT_DIR)
tokenizer.save_pretrained(OUTPUT_DIR)

print("Training complete! Model saved to:", OUTPUT_DIR)

Training complete! Model saved to: /content/support-bot-gpt2


In [None]:
from google.colab import drive
drive.mount('/content/drive')

# ***Inference***

In [24]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

# Load your fine-tuned model
OUTPUT_DIR = "/content/support-bot-gpt2"  # Folder where your model was saved

print("ðŸ“¥ Loading fine-tuned model for inference...")
tokenizer = AutoTokenizer.from_pretrained(OUTPUT_DIR)
model = AutoModelForCausalLM.from_pretrained(OUTPUT_DIR, device_map="auto", torch_dtype=torch.float16)
model.eval()

# Function for generating a response
def generate_response(prompt, max_length=200, temperature=0.7, top_p=0.9):
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    with torch.no_grad():
        output_ids = model.generate(
          **inputs,
          max_length=200,
          temperature=0.8,  # slightly higher randomness
          top_p=0.95,       # nucleus sampling
          do_sample=True,
          pad_token_id=tokenizer.eos_token_id,
          repetition_penalty=1.2  # discourages repetition
      )

    response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
    # Remove prompt from output
    return response[len(prompt):].strip()

# Example usage
prompt = "Answer this customer support question professionally and helpfully:\nHow do I return an item?"
response = generate_response(prompt)
print("ðŸŸ¢ Response:\n", response)

ðŸ“¥ Loading fine-tuned model for inference...
ðŸŸ¢ Response:
 This is a simple process. You can use your credit or debit card to deposit the purchase price into a bank account (like Chase) for easy access, such as with Credit Cards CheckoutsÂ® services . These deposits are then returned at checkout if you leave without paying any money within 30 days. For more information, contact customerservices@mycreditcard.com on 01-8661 038402337 in order not miss out on one of the many great savings opportunities that comes from using my own online shopping app!

 - Your browser does some processing when visiting MyCreditCard.com; it may take up too much space temporarily so please be patient while checking all relevant facts about each transaction ! The amount received will depend on where exactly they came through but depends on which merchants' website accounts have been activated : PayPal , KwikPaymentServiceÂ®, WPPY Services Inc. & V
