In [None]:
!pip install -q peft transformers datasets accelerate bitsandbytes

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.0/67.0 MB[0m [31m14.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m78.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m57.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m48.0 MB/s[0m eta [36m0:00:00[0m
[2K   [91m━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m241.8/664.8 MB[0m [31m2.8 MB/s[0m eta [36m0:02:33[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m5.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m20.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━

In [None]:
from datasets import load_dataset, Dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer, DataCollatorForLanguageModeling
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
import torch
import json
import os

In [None]:
import pandas as pd
with open('/content/cli_qa.json') as f:
    data = json.load(f)

# Format into instruction-following style
for row in data:
    row['text'] = f"### Question:\n{row['question']}\n\n### Answer:\n{row['answer']}"

with open('qa_lm_format.jsonl', 'w') as f:
    for row in data:
        f.write(json.dumps({"text": row['text']}) + "\n")

df = pd.read_json('qa_lm_format.jsonl', lines=True)
dataset = Dataset.from_pandas(df)

In [None]:
# Load model & tokenizer
model_id = 'TinyLlama/TinyLlama_v1.1'
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token
model = AutoModelForCausalLM.from_pretrained(model_id, device_map='auto')

# Prepare for LoRA
model = prepare_model_for_kbit_training(model)
lora_config = LoraConfig(r=8, lora_alpha=16, target_modules=['q_proj','v_proj'], lora_dropout=0.1, bias='none', task_type='CAUSAL_LM')
model = get_peft_model(model, lora_config)

In [None]:
# Tokenize dataset
def tokenize(example):
    return tokenizer(example['text'], truncation=True, padding='max_length', max_length=256)

tokenized = dataset.map(tokenize, batched=True)
data_collator = DataCollatorForLanguageModeling(tokenizer, mlm=False)

Map:   0%|          | 0/95 [00:00<?, ? examples/s]

In [None]:
# Training args
training_args = TrainingArguments(
    output_dir="lora-tinyllama",
    per_device_train_batch_size=8,
    num_train_epochs=1,
    logging_dir="logs",
    logging_steps=10,
    save_strategy="no",
    report_to="none"
)

# Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized,
    data_collator=data_collator,
)

trainer.train()

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Step,Training Loss
10,3.5946


TrainOutput(global_step=12, training_loss=3.5817846059799194, metrics={'train_runtime': 29.4506, 'train_samples_per_second': 3.226, 'train_steps_per_second': 0.407, 'total_flos': 151120411361280.0, 'train_loss': 3.5817846059799194, 'epoch': 1.0})

In [None]:
# Save adapter
model.save_pretrained("adapter/")
tokenizer.save_pretrained("adapter/")

('adapter/tokenizer_config.json',
 'adapter/special_tokens_map.json',
 'adapter/tokenizer.model',
 'adapter/added_tokens.json',
 'adapter/tokenizer.json')

In [None]:
!zip -r adapter.zip /content/adapter

  adding: content/adapter/ (stored 0%)
  adding: content/adapter/special_tokens_map.json (deflated 73%)
  adding: content/adapter/tokenizer.model (deflated 55%)
  adding: content/adapter/adapter_config.json (deflated 55%)
  adding: content/adapter/adapter_model.safetensors (deflated 8%)
  adding: content/adapter/README.md (deflated 66%)
  adding: content/adapter/tokenizer_config.json (deflated 68%)
  adding: content/adapter/tokenizer.json (deflated 85%)


In [None]:
from transformers import AutoTokenizer, pipeline
from peft import AutoPeftModelForCausalLM

adapter_path = "adapter"

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(adapter_path)
tokenizer.pad_token = tokenizer.eos_token  # ✅ add padding token if missing

# Load model with LoRA adapter in 4bit
model = AutoPeftModelForCausalLM.from_pretrained(
    adapter_path,
    device_map="auto",
    torch_dtype="auto",
)

# Load pipeline for inference
generator = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    device_map="auto"
)

# Prompt
prompt = "### Question:\nHow to list files in a directory?\n\n### Answer:\n"
output = generator(prompt, max_new_tokens=100)[0]["generated_text"]
print(output)


Device set to use cpu


### Question:
How to list files in a directory?

### Answer:

If you want to list files in directory, you can do something like this:

```
sudo ls

file1
file2
file3
file4
file5
```

If you want to list files in a directory, you can do something like this:

```
sudo find

file1
file2
file3
file4
file5
```

### Example:

### Answer:

If you
