# 🚀 Fine-Tune TinyLlama-1.1B with QLoRA on Support Chat Data

In [3]:
!pip install -q bitsandbytes accelerate datasets loralib peft transformers trl


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.0/67.0 MB[0m [31m13.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m366.3/366.3 kB[0m [31m25.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m491.5/491.5 kB[0m [31m35.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m193.6/193.6 kB[0m [31m16.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m49.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m45.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m26.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [4]:
import torch
from datasets import Dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training


In [5]:
model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto"
)

tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
tokenizer.pad_token = tokenizer.eos_token
model.config.use_cache = False


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/608 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.20G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.29k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/551 [00:00<?, ?B/s]

In [6]:
model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)

lora_config = LoraConfig(
    r=8,
    lora_alpha=32,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, lora_config)


In [7]:
import pandas as pd
from datasets import Dataset

# Step 1: Read from output.csv
df = pd.read_csv("Mental_Health_FAQ.csv")  # Make sure 'question' and 'answer' columns are present

# Step 2: Convert DataFrame to Hugging Face Dataset
dataset = Dataset.from_pandas(df)

# Step 3: Define formatting function
def format_instruction(example):
    return f"### Instruction:\n{example['Instruction']}\n\n### Response:\n{example['Response']}"

# Step 4: Apply formatting
dataset = dataset.map(lambda x: {"text": format_instruction(x)})

# Optional: Preview
print(dataset[0])


Map:   0%|          | 0/98 [00:00<?, ? examples/s]

{'Question_ID': 1590140, 'Instruction': 'What does it mean to have a mental illness?', 'Response': 'Mental illnesses are health conditions that disrupt a personâ€™s thoughts, emotions, relationships, and daily functioning. They are associated with distress and diminished capacity to engage in the ordinary activities of daily life.\nMental illnesses fall along a continuum of severity: some are fairly mild and only interfere with some aspects of life, such as certain phobias. On the other end of the spectrum lie serious mental illnesses, which result in major functional impairment and interference with daily life. These include such disorders as major depression, schizophrenia, and bipolar disorder, and may require that the person receives care in a hospital.\nIt is important to know that mental illnesses are medical conditions that have nothing to do with a personâ€™s character, intelligence, or willpower. Just as diabetes is a disorder of the pancreas, mental illness is a medical condi

In [8]:
def tokenize_function(example):
    tokenized = tokenizer(example["text"], truncation=True, padding="max_length", max_length=512)
    tokenized["labels"] = tokenized["input_ids"].copy()
    return tokenized

tokenized_dataset = dataset.map(tokenize_function, batched=True)


Map:   0%|          | 0/98 [00:00<?, ? examples/s]

In [9]:
training_args = TrainingArguments(
    output_dir="./tinyllama-qlora-support-bot",
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    learning_rate=2e-4,
    logging_dir="./logs",
    num_train_epochs=15,
    logging_steps=10,
    save_total_limit=2,
    save_strategy="epoch",
    bf16=True,
    optim="paged_adamw_8bit"
)


In [10]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    tokenizer=tokenizer
)

trainer.train()


  trainer = Trainer(
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize?ref=models
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33m22ad037[0m ([33m22ad037-muthoot-institute-of-technology-and-science[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


  return fn(*args, **kwargs)


Step,Training Loss
10,5.0038
20,1.5137
30,1.2101
40,1.0796
50,1.0527
60,1.108
70,1.1761
80,1.0287
90,1.0219
100,0.9916


  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)


TrainOutput(global_step=195, training_loss=1.2511436658027844, metrics={'train_runtime': 2539.9039, 'train_samples_per_second': 0.579, 'train_steps_per_second': 0.077, 'total_flos': 4676779046338560.0, 'train_loss': 1.2511436658027844, 'epoch': 15.0})

In [11]:
model.save_pretrained("tinyllama-qlora-support-bot")
tokenizer.save_pretrained("tinyllama-qlora-support-bot")


('tinyllama-qlora-support-bot/tokenizer_config.json',
 'tinyllama-qlora-support-bot/special_tokens_map.json',
 'tinyllama-qlora-support-bot/chat_template.jinja',
 'tinyllama-qlora-support-bot/tokenizer.model',
 'tinyllama-qlora-support-bot/added_tokens.json',
 'tinyllama-qlora-support-bot/tokenizer.json')

In [12]:
from transformers import pipeline

pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)

instruction = "What does it mean to have a mental illness?"
prompt = f"### Instruction:\n{instruction}\n\n### Response:\n"

output = pipe(prompt, max_new_tokens=100)
print(output[0]['generated_text'])


Device set to use cuda:0
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.
  return fn(*args, **kwargs)


### Instruction:
What does it mean to have a mental illness?

### Response:
A mental illness is a disease or difficulty in thinking, feeling, or behaving that interferes with or impairs your ability to carry out daily activities, functions, or relationships. Mental illnesses include depression, bipolar disorder, anxiety disorder, personality disorder, schizophrenia, eating disorders, and other conditions.


In [13]:
import shutil
shutil.make_archive("tinyllama-qlora-support-bot", 'zip', "tinyllama-qlora-support-bot")


'/content/tinyllama-qlora-support-bot.zip'

In [14]:
from google.colab import files

# Download the zipped model to your local system
files.download("tinyllama-qlora-support-bot.zip")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [15]:
pip install -q gradio

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m54.2/54.2 MB[0m [31m12.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m323.3/323.3 kB[0m [31m18.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m95.2/95.2 kB[0m [31m8.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.5/11.5 MB[0m [31m117.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m72.0/72.0 kB[0m [31m7.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.5/62.5 kB[0m [31m5.5 MB/s[0m eta [36m0:00:00[0m
[?25h

In [16]:
# Mount your Google Drive (optional)
# from google.colab import drive
# drive.mount('/content/drive')

# Unzip your fine-tuned model
!unzip -o tinyllama-qlora-support-bot.zip -d fine_tuned_model


Archive:  tinyllama-qlora-support-bot.zip
   creating: fine_tuned_model/checkpoint-182/
   creating: fine_tuned_model/checkpoint-195/
  inflating: fine_tuned_model/special_tokens_map.json  
  inflating: fine_tuned_model/adapter_model.safetensors  
  inflating: fine_tuned_model/tokenizer.model  
  inflating: fine_tuned_model/chat_template.jinja  
  inflating: fine_tuned_model/tokenizer_config.json  
  inflating: fine_tuned_model/adapter_config.json  
  inflating: fine_tuned_model/tokenizer.json  
  inflating: fine_tuned_model/README.md  
  inflating: fine_tuned_model/checkpoint-182/special_tokens_map.json  
  inflating: fine_tuned_model/checkpoint-182/optimizer.pt  
  inflating: fine_tuned_model/checkpoint-182/adapter_model.safetensors  
  inflating: fine_tuned_model/checkpoint-182/tokenizer.model  
  inflating: fine_tuned_model/checkpoint-182/rng_state.pth  
  inflating: fine_tuned_model/checkpoint-182/chat_template.jinja  
  inflating: fine_tuned_model/checkpoint-182/tokenizer_config.

In [18]:
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
import gradio as gr

# Load your fine-tuned Mental Illness Assistant model
model = AutoModelForCausalLM.from_pretrained("./fine_tuned_model")
tokenizer = AutoTokenizer.from_pretrained("./fine_tuned_model")

pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)

def format_prompt(instruction):
    return f"### Instruction:\n{instruction}\n\n### Response:\n"

def chat_response(message, chat_history):
    prompt = format_prompt(message)
    output = pipe(prompt, max_new_tokens=300, do_sample=True, temperature=0.7)
    generated_text = output[0]['generated_text']
    response = generated_text.split("### Response:\n")[-1].strip()
    chat_history.append((message, response))
    return "", chat_history

# Gradio UI for Mental Illness Assistant
with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("## 🧠 Mental Health Assistant\nAsk questions about mental illness, symptoms, and care.")

    chatbot = gr.Chatbot()
    msg = gr.Textbox(label="Your Question", placeholder="e.g., What does it mean to have anxiety?", lines=1)
    submit = gr.Button("Submit")
    clear = gr.Button("Clear Chat")

    submit.click(chat_response, [msg, chatbot], [msg, chatbot])
    msg.submit(chat_response, [msg, chatbot], [msg, chatbot])  # Also allow Enter key
    clear.click(lambda: None, None, chatbot, queue=False)

demo.launch()


Device set to use cuda:0
  chatbot = gr.Chatbot()


It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://68b985ed7d0567c2bf.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [1]:
# Upload CSV to Google Colab (from local machine)
from google.colab import files
import pandas as pd

# Method 1: Upload directly from your computer
uploaded = files.upload()

# Get the filename (assuming you upload just one file)
filename = next(iter(uploaded))

Saving Mental_Health_FAQ.csv to Mental_Health_FAQ.csv


In [2]:
import pandas as pd
df=pd.read_csv("Mental_Health_FAQ.csv")
print(df.columns)  # Should show: Index(['Questions', 'Answers'], dtype='object')


Index(['Question_ID', 'Instruction', 'Response'], dtype='object')
