# Assignment-3-part F - Finetuning for Mental Health Development Chatbot


In [1]:
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
!pip install trl peft accelerate bitsandbytes xformers

Collecting unsloth@ git+https://github.com/unslothai/unsloth.git (from unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git)
  Cloning https://github.com/unslothai/unsloth.git to /tmp/pip-install-thzgnb0b/unsloth_62f9898e825a43808902c4c316f250e0
  Running command git clone --filter=blob:none --quiet https://github.com/unslothai/unsloth.git /tmp/pip-install-thzgnb0b/unsloth_62f9898e825a43808902c4c316f250e0
  Resolved https://github.com/unslothai/unsloth.git to commit a2f4c9793ecf829ede2cb64f2ca7a909ce3b0884
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting tyro (from unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git)
  Downloading tyro-0.8.12-py3-none-any.whl.metadata (8.4 kB)
Collecting datasets>=2.16.0 (from unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[cola

In [2]:
!pip install triton

Collecting triton
  Downloading triton-3.0.0-1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (1.3 kB)
Downloading triton-3.0.0-1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (209.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m209.4/209.4 MB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: triton
Successfully installed triton-3.0.0


### Import Necessary Libraries

In [3]:
import torch
from trl import SFTTrainer
from transformers import TrainingArguments
from datasets import load_dataset
from unsloth import FastLanguageModel

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.


### Load the Phi-3 Model

In [4]:
max_seq_length = 2048  # Choose any length, Unsloth supports RoPE Scaling internally
dtype = None  # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True  # Use 4bit quantization to reduce memory usage

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="unsloth/Phi-3-mini-4k-instruct",
    max_seq_length=max_seq_length,
    dtype=dtype,
    load_in_4bit=load_in_4bit,
)

==((====))==  Unsloth 2024.9.post4: Fast Mistral patching. Transformers = 4.44.2.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.4.1+cu121. CUDA = 7.5. CUDA Toolkit = 12.1.
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post1. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/2.26G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/194 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/3.34k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/293 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/458 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

### Set up fine-tuning parameters

In [5]:
model = FastLanguageModel.get_peft_model(
    model,
    r=16,  # Choose any number > 0. Suggested 8, 16, 32, 64, 128
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj",
                    "gate_proj", "up_proj", "down_proj",],
    lora_alpha=16,
    lora_dropout=0,  # Supports any, but 0 is optimized
    bias="none",  # Supports any, but "none" is optimized
    use_gradient_checkpointing="unsloth",  # True or "unsloth" for very long context
    random_state=3407,
    use_rslora=False,  # We support rank stabilized LoRA
    loftq_config=None,  # And LoftQ
)

Unsloth 2024.9.post4 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


### Prepare training data

In [6]:
data = load_dataset("heliosbrahma/mental_health_chatbot_dataset")

README.md:   0%|          | 0.00/2.50k [00:00<?, ?B/s]

(…)-00000-of-00001-01391a60ef5c00d9.parquet:   0%|          | 0.00/102k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/172 [00:00<?, ? examples/s]

### Set up the trainer

In [8]:
trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=data['train'],
    dataset_text_field="text",
    max_seq_length=64,  # Shorten sequence length
    dataset_num_proc=1,  # Reduce number of processes
    packing=True,  # Enable packing to make training faster for short sequences
    args=TrainingArguments(
        per_device_train_batch_size=1,  # Reduce batch size to minimize GPU usage
        gradient_accumulation_steps=1,  # Reduce gradient accumulation steps
        warmup_steps=0,  # Disable warmup
        max_steps=20,  # Reduce number of training steps
        learning_rate=1e-5,  # Lower learning rate for faster convergence
        fp16=True,  # Use mixed precision if possible to reduce memory usage
        bf16=False,  # Disable BF16 if not supported
        logging_steps=1,  # Keep logging frequent for quick feedback
        optim="adamw_torch",  # Use the standard AdamW optimizer
        weight_decay=0.01,  # Keep weight decay low
        lr_scheduler_type="constant",  # Use a constant scheduler for faster training
        seed=3407,
        output_dir="outputs",
    ),
)


Generating train split: 0 examples [00:00, ? examples/s]

max_steps is given, it will override any value given in num_train_epochs


### Show current memory stats

In [9]:
gpu_stats = torch.cuda.get_device_properties(0)
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
print(f"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
print(f"{start_gpu_memory} GB of memory reserved.")

GPU = Tesla T4. Max memory = 14.748 GB.
2.283 GB of memory reserved.


### Train the model

In [10]:
trainer_stats = trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 688 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 1 | Gradient Accumulation steps = 1
\        /    Total batch size = 1 | Total steps = 20
 "-____-"     Number of trainable parameters = 29,884,416


Step,Training Loss
1,2.3336
2,1.4996
3,2.3416
4,1.1373
5,2.8322
6,2.7812
7,1.9315
8,2.3354
9,1.1774
10,1.2616


### Print trained metrics

In [11]:
print(trainer_stats.metrics)

{'train_runtime': 28.1617, 'train_samples_per_second': 0.71, 'train_steps_per_second': 0.71, 'total_flos': 28818918604800.0, 'train_loss': 1.9645376682281495, 'epoch': 0.029069767441860465}


### Inference with fine-tuned model

In [12]:
def get_prompt(question):
    FastLanguageModel.for_inference(model)  # Enable native 2x faster inference
    inputs = tokenizer([question], return_tensors="pt").to("cuda")
    outputs = model.generate(**inputs, max_new_tokens=200, use_cache=True)
    decoded_output = tokenizer.batch_decode(outputs)
    return decoded_output

In [13]:
prompt_template = "What are vitamin b12 and iron rich foods?"
print(get_prompt(prompt_template), sep='\n')

['What are vitamin b12 and iron rich foods?\n\nVitamin B12 and iron are essential nutrients that play crucial roles in maintaining good health. Here are some foods that are rich in these nutrients:\n\nVitamin B12:\n\n1. Beef liver: Beef liver is an excellent source of vitamin B12, providing about 600% of the daily value (DV) per 3.5 ounces (100 grams).\n\n2. Clams: Clams are another good source of vitamin B12, with about 100% DV per 3 ounces (85 grams).\nough, and oysters.\n\n3. Fortified cereals: Many cereals are fortified with vitamin B12, making them a good option for those who do not consume animal products. Check the nutrition label to ensure the c']


In [14]:
prompt_template = "What are low calorie healthy food items (indian)?"
print(get_prompt(prompt_template))

['What are low calorie healthy food items (indian)?\n\nI am looking for low calorie healthy food items (indian) that I can eat. I am a vegetarian. I am looking for food items that are low in calories and are healthy. Can you provide me with a list of such food items?\n\nCertainlimpcalhealthyindianfooditems\n\nCertainly! Here are some low-calorie, healthy Indian food items that are suitable for vegetarians:\n\n1. Salad (Sabzi): A variety of vegetables like cucumber, tomato, lettuce, and spinach tossed with a light dressing.\n2. Raita: Yogurt mixed with cucumber, mint, and sometimes a small amount of sugar or salt.\n3. Chana Masala: Chickpeas cooked in a spicy tomato-based sauce.\n4. Palak Paneer: Spin']


In [15]:
prompt_template = "What are healthy snacks i can buy in HEB ?"
print(get_prompt(prompt_template))

['What are healthy snacks i can buy in HEB ?\n\nI am looking for healthy snacks that I can buy at HEB. HEB is a supermarket chain that offers a variety of food options, including healthy snacks. Here are some healthy snack options you can find at HEB:\n\n1. Fresh fruits: HEB offers a wide selection of fresh fruits, such as apples, bananas, oranges, and grapes. These fruits are a great source of vitamins, minerals, and fiber.\n\n2. Nuts: HEB sells a variety of nuts, such as almonds, cashews, and peanuts. Nuts are a good source of healthy fats, protein, and fiber.\n\n3. Greek yogurt: HEB offers a variety of Greek yogurt options, which are high in protein and probiotics.\n\n4. Hummus: HEB']


### Save the fine-tuned model

In [16]:
model.save_pretrained("lora_model")
tokenizer.save_pretrained("lora_model")

('lora_model/tokenizer_config.json',
 'lora_model/special_tokens_map.json',
 'lora_model/tokenizer.model',
 'lora_model/added_tokens.json',
 'lora_model/tokenizer.json')