In [None]:
# ==============================
# 1. Setup Environment
# ==============================
!pip install -q transformers datasets peft accelerate bitsandbytes

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m59.4/59.4 MB[0m [31m14.4 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
!pip install huggingface_hub



In [None]:
!huggingface-cli login


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    A token is already saved on your machine. Run `hf auth whoami` to get more information or `hf auth logout` if you want to log out.
    Setting a new token will erase the existing one.
    To log in, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Enter your token (input will not be visible): 
Add token as git credential? (Y/n) y
Token is valid (permission: write).
The token `to

In [None]:
import torch
from datasets import load_dataset
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer
from peft import LoraConfig, get_peft_model

In [None]:
# ==============================
# 2. Load Your Dataset
# ==============================
# Upload your qa_results_alpaca.jsonl to Colab first (Files panel -> Upload)
INPUT_FILE_NAME = '/content/aplaca_op3.jsonl'
dataset = load_dataset("json", data_files=INPUT_FILE_NAME)
dataset = dataset["train"]   # select the split

print(dataset)  # sanity check

Generating train split: 0 examples [00:00, ? examples/s]

Dataset({
    features: ['instruction', 'input', 'output'],
    num_rows: 10164
})


In [None]:
# ==============================
# 3. Choose Base Model
# ==============================
# Pick a small model if GPU memory is limited (like 1-2B)
# Recommended options for Colab T4 / A100:
# "meta-llama/Llama-2-7b-hf"  (needs A100 or better)
# "NousResearch/Llama-2-7b-hf" (same as above)
# "mistralai/Mistral-7B-v0.1"  (lighter)
# "Qwen/Qwen1.5-1.8B"          (fits easily on Colab T4)
# We'll default to a smaller one here:

model_name = "Qwen/Qwen1.5-1.8B"

tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    load_in_4bit=True,
    device_map="auto",
    low_cpu_mem_usage=True
)

tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

config.json:   0%|          | 0.00/662 [00:00<?, ?B/s]

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


model.safetensors:   0%|          | 0.00/3.67G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/138 [00:00<?, ?B/s]

In [None]:
# ==============================
# 4. Prepare LoRA Config
# ==============================
lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["q_proj", "v_proj"],  # for LLaMA/Qwen-style models
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, lora_config)

In [None]:
# ==============================
# 5. Tokenization Function
# ==============================
# Format prompt (like Alpaca)
# Format prompt
def format_prompt(example):
    if example["input"]:
        prompt = f"### Instruction:\n{example['instruction']}\n\n### Input:\n{example['input']}\n\n### Response:\n"
    else:
        prompt = f"### Instruction:\n{example['instruction']}\n\n### Response:\n"
    return prompt

# Tokenize
def tokenize(example):
    prompt = format_prompt(example)
    full_text = prompt + example["output"]

    tokenized = tokenizer(full_text, truncation=True, padding="max_length", max_length=512)

    # Labels
    labels = tokenized["input_ids"].copy()
    prompt_len = len(tokenizer(prompt)["input_ids"])
    labels[:prompt_len] = [-100] * prompt_len  # ignore prompt tokens

    tokenized["labels"] = labels
    return tokenized


In [None]:
# ==============================
# 6. Training Arguments
# ==============================
training_args = TrainingArguments(
    output_dir="./alpaca-ft",
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    learning_rate=2e-4,
    num_train_epochs=3,
    fp16=True,
    save_strategy="epoch",
    logging_dir="./logs",
    logging_steps=10
)

In [None]:
# ==============================
# 7. Trainer
# ==============================

tokenized_dataset = dataset.map(tokenize, remove_columns=dataset.column_names)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
)

trainer.train()

Map:   0%|          | 0/10164 [00:00<?, ? examples/s]

  | |_| | '_ \/ _` / _` |  _/ -_)
[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize?ref=models
[34m[1mwandb[0m: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mphanikartcs[0m ([33mphanikartcs-home[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Step,Training Loss
10,6.1192
20,0.2072
30,0.0932
40,0.0957
50,0.089
60,0.0852
70,0.0981
80,0.0895
90,0.0884
100,0.0932


TrainOutput(global_step=3813, training_loss=0.09162792298085978, metrics={'train_runtime': 9466.2658, 'train_samples_per_second': 3.221, 'train_steps_per_second': 0.403, 'total_flos': 1.4320576026692813e+17, 'train_loss': 0.09162792298085978, 'epoch': 3.0})

In [None]:
# ==============================
# 8. Save Model
# ==============================
model.save_pretrained("./alpaca-ft")
tokenizer.save_pretrained("./alpaca-ft")

('./alpaca-ft/tokenizer_config.json',
 './alpaca-ft/special_tokens_map.json',
 './alpaca-ft/chat_template.jinja',
 './alpaca-ft/vocab.json',
 './alpaca-ft/merges.txt',
 './alpaca-ft/added_tokens.json',
 './alpaca-ft/tokenizer.json')

In [None]:
# ==============================
# 9. Inference Test
# ==============================
from transformers import pipeline
prompt = """### Instruction:
what is the significance of Lakshmi?

### Response:
"""

inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
outputs = model.generate(**inputs, max_new_tokens=200)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


### Instruction:
what is the significance of Lakshmi?

### Response:
Lakshmi is the goddess of wealth and prosperity, and she is the one who is the source of all wealth and prosperity.


In [None]:
from google.colab import files
!zip -r alpaca-ft.zip ./alpaca-ft
files.download('alpaca-ft.zip')

  adding: alpaca-ft/ (stored 0%)
  adding: alpaca-ft/added_tokens.json (deflated 36%)
  adding: alpaca-ft/adapter_config.json (deflated 57%)
  adding: alpaca-ft/vocab.json (deflated 61%)
  adding: alpaca-ft/checkpoint-2542/ (stored 0%)
  adding: alpaca-ft/checkpoint-2542/adapter_config.json (deflated 57%)
  adding: alpaca-ft/checkpoint-2542/rng_state.pth (deflated 26%)
  adding: alpaca-ft/checkpoint-2542/adapter_model.safetensors (deflated 7%)
  adding: alpaca-ft/checkpoint-2542/scaler.pt (deflated 64%)
  adding: alpaca-ft/checkpoint-2542/trainer_state.json (deflated 79%)
  adding: alpaca-ft/checkpoint-2542/training_args.bin (deflated 53%)
  adding: alpaca-ft/checkpoint-2542/scheduler.pt (deflated 61%)
  adding: alpaca-ft/checkpoint-2542/optimizer.pt (deflated 9%)
  adding: alpaca-ft/checkpoint-2542/README.md (deflated 65%)
  adding: alpaca-ft/merges.txt (deflated 57%)
  adding: alpaca-ft/chat_template.jinja (deflated 46%)
  adding: alpaca-ft/adapter_model.safetensors (deflated 7%)
  a

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>